void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); int num; if (this->layer_param_.mvn_param().across_channels()) num = bottom[0]->num(); else num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; Dtype eps = 1e-10; if (this->layer_param_.mvn_param().normalize_variance()) { // put the squares of bottom into temp_ caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), temp_.mutable_cpu_data()); // computes variance using var(X) = E(X^2) - (EX)^2 caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(), sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E(X^2) caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2), temp_.mutable_cpu_data()); // (EX)^2 caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(), variance_.mutable_cpu_data()); // variance // do mean and variance normalization // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); // normalize variance caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); } else { caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); } }
void SocketSyncCPU<Dtype>::on_gradients_ready() { // Reduce gradients from local CPU. P2PSyncCPU<Dtype>::on_gradients_ready(); // Send gradients to corresponding parameter server node int peer = rank_ + 1; for (int n = 0; n < peers_.size() - 1; ++n) { if (peer == peers_.size()) { peer = 0; } diff_send_[peer]->Write(); peer++; } // Sum gradients as they are received peer = rank_ + 1; for (int n = 0; n < peers_.size() - 1; ++n) { if (peer == peers_.size()) { peer = 0; } SocketBuffer * buffer = diff_recv_[peer]->Read(); Dtype* src = reinterpret_cast<Dtype*>(buffer->addr()); Dtype* dst = diff_ + own_offs_; caffe_add(own_size_, src, dst, dst); peer++; } }
void SigmoidWeightedCrossEntropyLossLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[2]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to weight inputs."; } if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // First, compute the diff const int count = bottom[0]->count(); const int num = bottom[0]->num(); const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data(); const Dtype* target = bottom[1]->cpu_data(); const Dtype* weight = bottom[2]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* tmp = new Dtype[count << 1]; Dtype* tmp1 = tmp + count; // diff: 1/2 caffe_set(count, (Dtype)0.5, bottom_diff); // diff: 1/2 * \hat{p} caffe_mul(count, bottom_diff, sigmoid_output_data, bottom_diff); // diff: 1/2 * (1-p) * \hat{p} caffe_set(count, (Dtype)1, tmp1); caffe_sub(count, tmp1, target, tmp); caffe_mul(count, bottom_diff, tmp, bottom_diff); // diff: 1/2(1-w) * (1-p) * \hat{p} caffe_sub(count, tmp1, weight, tmp); caffe_div(count, bottom_diff, tmp, bottom_diff); // tmp: 1-\hat{p} caffe_sub(count, tmp1, sigmoid_output_data, tmp); // tmp: p * (1-\hat{p}) caffe_mul(count, tmp, target, tmp); // tmp: -1/2 * p * (1-\hat{p}) caffe_set(count, (Dtype)-0.5, tmp1); caffe_mul(count, tmp, tmp1, tmp); // tmp: -1/2w * p * (1-\hat{p}) caffe_div(count, tmp, weight, tmp); // diff: -(1/2w * p * (1-\hat{p}) - 1/2(1-w) * (1-p) * \hat{p}) caffe_add(count, bottom_diff, tmp, bottom_diff); delete[] tmp; // Scale down gradient const Dtype loss_weight = top[0]->cpu_diff()[0]; caffe_scal(count, loss_weight / num, bottom_diff); } }
void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); int num; if (this->layer_param_.mvn_param().across_channels()) num = bottom[0]->num(); else num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; if (this->layer_param_.mvn_param().normalize_variance()) { caffe_mul(temp_.count(), top_data, top_diff, bottom_diff); caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., bottom_diff); caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff); caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 1., bottom_diff); caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim), bottom_diff); // put the squares of bottom into temp_ caffe_powx(temp_.count(), bottom_data, Dtype(2), temp_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff); } else { caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, top_diff, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff); } }
void BilinearPatchFastLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { caffe_set(bottom[0]->num()*bottom[0]->channels()*bottom[0]->height()*bottom[0]->width(), Dtype(0.0), bottom[0]->mutable_cpu_diff()); caffe_set(bottom[1]->num()*bottom[1]->channels()*bottom[1]->height()*bottom[1]->width(), Dtype(0.0), bottom[1]->mutable_cpu_diff()); for (int n = 0; n < bottom[0]->num(); n++){ for(int i = 0; i < poolingFieldsNum; i++){ if (propagate_down[0]) { multiplyAllChannelsByMask(bottom[1]->cpu_data() + bottom[1]->channels() * bottom[1]->height() * bottom[1]->width() * n, bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i, masked_buffer2.mutable_cpu_data(), bottom[1]->height()*bottom[1]->width(), bottom[1]->channels()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, bottom[0]->channels(), bottom[0]->width()*bottom[0]->height(), bottom[1]->channels(),(Dtype)1., top[0]->cpu_diff() + n * top[0]->channels() + i * bottom[0]->channels() * bottom[1]->channels(), masked_buffer2.cpu_data(), (Dtype)0., dlda_buffer.mutable_cpu_diff()); multiplyAllChannelsByMask(dlda_buffer.cpu_diff(), bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i,dlda_buffer.mutable_cpu_diff(), bottom[0]->height()*bottom[0]->width(), bottom[0]->channels()); caffe_add(bottom[0]->channels()*bottom[0]->height()*bottom[0]->width(), dlda_buffer.cpu_diff(), bottom[0]->cpu_diff() + bottom[0]->channels() * bottom[0]->height() * bottom[0]->width() * n, bottom[0]->mutable_cpu_diff() + bottom[0]->channels() * bottom[0]->height() * bottom[0]->width() * n); } if (propagate_down[1]) { multiplyAllChannelsByMask(bottom[0]->cpu_data() + bottom[0]->channels() * bottom[0]->height() * bottom[0]->width() * n, bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i, masked_buffer1.mutable_cpu_data(), bottom[0]->height()*bottom[0]->width(), bottom[0]->channels()); caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, bottom[1]->channels(), bottom[1]->width()*bottom[1]->height(), bottom[0]->channels(),(Dtype)1., top[0]->cpu_diff() + n * top[0]->channels() + i * bottom[0]->channels() * bottom[1]->channels(), masked_buffer1.cpu_data(), (Dtype)0., dldb_buffer.mutable_cpu_diff()); multiplyAllChannelsByMask(dldb_buffer.cpu_diff(), bottom[2]->cpu_data() + bottom[2]->channels() * bottom[2]->height() * bottom[2]->width() * n, i,dldb_buffer.mutable_cpu_diff(), bottom[1]->height()*bottom[1]->width(), bottom[1]->channels()); caffe_add(bottom[1]->channels()*bottom[1]->height()*bottom[1]->width(), dldb_buffer.cpu_diff(), bottom[1]->cpu_diff() + bottom[1]->channels() * bottom[1]->height() * bottom[1]->width() * n, bottom[1]->mutable_cpu_diff() + bottom[1]->channels() * bottom[1]->height() * bottom[1]->width() * n); } } } }
void TripletClipHingeLossLayer<Dtype>:: average_hashing(const vector<Blob<Dtype>*>& bottom){ int batch_size = bottom[0]->num() / frame_num; caffe_set(batch_size*dim, Dtype(0.0), ave_or.mutable_cpu_data()); caffe_set(batch_size*dim, Dtype(0.0), ave_si.mutable_cpu_data()); caffe_set(batch_size*dim, Dtype(0.0), ave_di.mutable_cpu_data()); for (int i = 0; i < batch_size; ++i){ for (int j = 0; j < frame_num; ++j){ int index = i*frame_num*dim + j*dim; caffe_add(dim, bottom[0]->cpu_data() + index, ave_or.cpu_data() + i*dim, ave_or.mutable_cpu_data() + i*dim); caffe_add(dim, bottom[1]->cpu_data() + index, ave_si.cpu_data() + i*dim, ave_si.mutable_cpu_data() + i*dim); caffe_add(dim, bottom[2]->cpu_data() + index, ave_di.cpu_data() + i*dim, ave_di.mutable_cpu_data() + i*dim); } caffe_scal(dim, 1 / Dtype(frame_num), ave_or.mutable_cpu_data() + i*dim); caffe_scal(dim, 1 / Dtype(frame_num), ave_si.mutable_cpu_data() + i*dim); caffe_scal(dim, 1 / Dtype(frame_num), ave_di.mutable_cpu_data() + i*dim); } }
void NoiseLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Dtype* rand_vec_data = rand_vec_.mutable_cpu_data(); const int count = bottom[0]->count(); // create gaussian noise and add to top, in-place/ or not the same if (sigma_> 0) { caffe_rng_gaussian(count, Dtype(0), sigma_, rand_vec_data); } else if (bottom[0] == top[0]) { } else { caffe_set(count, Dtype(0), rand_vec_data); } // use copy not add caffe_add(count, rand_vec_data, bottom_data, top_data); }
void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } if (top.size() == 1) { caffe_copy(count_, top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff()); return; } caffe_add(count_, top[0]->cpu_diff(), top[1]->cpu_diff(), bottom[0]->mutable_cpu_diff()); // Add remaining top blob diffs. for (int i = 2; i < top.size(); ++i) { const Dtype* top_diff = top[i]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); } }
void NoiseLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const int count = bottom[0]->count(); if (this->phase_ == TRAIN) { Dtype data_magnitude = sqrt(bottom[0]->sumsq_data() / Dtype(bottom[0]->count())); if (this->layer_param_.noise_param().has_gaussian_std()) { caffe_rng_gaussian<Dtype>(count, this->layer_param_.noise_param().bias(), data_magnitude * this->layer_param_.noise_param().gaussian_std(), mask.mutable_cpu_data()); } else if (this->layer_param_.noise_param().has_uniform_range()) { caffe_rng_uniform<Dtype>(count, this->layer_param_.noise_param().bias() - this->layer_param_.noise_param().uniform_range(), this->layer_param_.noise_param().bias() + this->layer_param_.noise_param().uniform_range(), mask.mutable_cpu_data()); } caffe_add(count, bottom_data, mask.cpu_data(), top_data); } else { caffe_copy(count, bottom_data, top_data); } }
void CapSequenceLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { vector<int> lengths; const int num_lengths = this->runtime_param().cap_sequence_param().sequence_lengths_size(); for (int i = 0; i < num_lengths; ++i) { lengths.push_back( this->runtime_param().cap_sequence_param().sequence_lengths(i)); } int size = 1; for (int i = 1; i < bottom[0]->shape().size(); ++i) { size *= bottom[0]->shape(i); } for (int i = 0; i < lengths.size(); ++i) { const int offset = i * size; caffe_add(size, bottom[lengths[i]]->cpu_diff() + offset, top[0]->cpu_diff() + offset, bottom[lengths[i]]->mutable_cpu_diff() + offset); } }
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); int num; if (this->layer_param_.mvn_param().across_channels()) num = bottom[0]->num(); else num = bottom[0]->num() * bottom[0]->channels(); int dim = bottom[0]->count() / num; // subtract mean caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data, sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); // EX caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., mean_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data); // X-EX if (this->layer_param_.mvn_param().normalize_variance()) { // compute variance using var(X) = E((X-EX)^2) caffe_powx(bottom[0]->count(), top_data, Dtype(2), temp_.mutable_cpu_data()); // (X-EX)^2 caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(), sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E((X-EX)^2) // normalize variance caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1., variance_.cpu_data(), sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); } }
void LocalLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype* x_data = col_buffer_.mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); Blob<Dtype> E; E.Reshape(1, 1, 1, K_); FillerParameter filler_param; filler_param.set_value(1); ConstantFiller<Dtype> filler(filler_param); filler.Fill(&E); Blob<Dtype> intermediate; intermediate.Reshape(1, 1, K_, N_); for (int n=0; n<num_; n++) { im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); for (int m=0; m<num_output_; m++) { caffe_mul(K_*N_, x_data, weight+this->blobs_[0]->offset(m), intermediate.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, 1, N_, K_, (Dtype)1., E.cpu_data(), intermediate.cpu_data(), (Dtype)0., top_data + top[0]->offset(n, m)); } if (bias_term_) { caffe_add(M_ * N_, this->blobs_[1]->cpu_data(), top_data + top[0]->offset(n), top_data + top[0]->offset(n)); } } }
void Tensor<Dtype>::AddFrom(const Tensor& source) { if (source.count() != count_ || source.shape() != shape_) { ASSERT(false, "Trying to add blobs of different sizes: " << source.count() << " != " << count_); } switch (mode()) { case Caffe::CPU: caffe_add(count_, source.cpu_mem(), this->cpu_mem(), this->mutable_cpu_mem()); break; case Caffe::GPU: #ifndef CPU_ONLY caffe_gpu_add(count_, source.gpu_mem(), this->gpu_mem(), this->mutable_gpu_mem()); #else NO_GPU; #endif break; default: ASSERT(false, "Unknown caffe mode."); } }
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* const_bottom_data = bottom[0]->cpu_data(); const Dtype* const_top_data = top[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); const Dtype* shift_data = this->blobs_[1]->cpu_data(); // Mean normalization if (frozen_ || this->phase_ == TEST) { // Use the moving average mean caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(), batch_statistic_.mutable_cpu_data()); } else { // Compute the mean by averaging over spatial and batch dimensions. caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1) / (height_ * width_), const_bottom_data, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1) / num_, spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // Add to the moving average if (!frozen_) { caffe_cpu_axpby(batch_statistic_.count(), Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(), bn_momentum_, this->blobs_[2]->mutable_cpu_data()); } } // Broadcast the mean vector caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(-1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); // Subtract caffe_add(broadcast_buffer_.count(), const_bottom_data, broadcast_buffer_.cpu_data(), top_data); // Variance normalization if (frozen_ || this->phase_ == TEST) { // Use the moving average variance caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(), batch_statistic_.mutable_cpu_data()); } else { caffe_powx(broadcast_buffer_.count(), const_top_data, Dtype(2), broadcast_buffer_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1) / (height_ * width_), broadcast_buffer_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1) / num_, spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // Add eps caffe_add_scalar(batch_statistic_.count(), bn_eps_, batch_statistic_.mutable_cpu_data()); // Inverse standard deviation caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(), Dtype(-0.5), batch_statistic_.mutable_cpu_data()); // Add to the moving average if (!frozen_) { caffe_cpu_axpby(batch_statistic_.count(), Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(), bn_momentum_, this->blobs_[3]->mutable_cpu_data()); } } // Broadcast the inverse std caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); // Multiply with the inverse std caffe_mul(broadcast_buffer_.count(), const_top_data, broadcast_buffer_.cpu_data(), top_data); // Save the normalized inputs and std for backprop if (!frozen_) { caffe_copy(broadcast_buffer_.count(), const_top_data, x_norm_.mutable_cpu_data()); caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(), x_inv_std_.mutable_cpu_data()); } // Scale caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); caffe_mul(broadcast_buffer_.count(), const_top_data, broadcast_buffer_.cpu_data(), top_data); // Shift caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), broadcast_buffer_.mutable_cpu_data()); caffe_add(broadcast_buffer_.count(), const_top_data, broadcast_buffer_.cpu_data(), top_data); }
void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_lr = this->net_->params_lr(); Dtype delta = this->param_.delta(); Dtype momentum = this->param_.momentum(); Dtype local_rate = rate * net_params_lr[param_id]; size_t update_history_offset = net_params.size(); switch (Caffe::mode()) { case Caffe::CPU: { // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history of gradients caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, this->update_[param_id]->cpu_data(), momentum, this->history_[param_id]->mutable_cpu_data()); // add delta to history to guard against dividing by zero later caffe_set(net_params[param_id]->count(), delta, this->temp_[param_id]->mutable_cpu_data()); caffe_add(net_params[param_id]->count(), this->temp_[param_id]->cpu_data(), this->history_[update_history_offset + param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); caffe_add(net_params[param_id]->count(), this->temp_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); // divide history of updates by history of gradients caffe_div(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->temp_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // jointly compute the RMS of both for update and gradient history caffe_powx(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); // compute the update caffe_mul(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); // compute square of update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history of updates caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, this->update_[param_id]->cpu_data(), momentum, this->history_[update_history_offset + param_id]->mutable_cpu_data()); // apply learning rate caffe_cpu_scale(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), net_params[param_id]->mutable_cpu_diff()); break; } case Caffe::GPU: { #ifndef CPU_ONLY adadelta_update_gpu(net_params[param_id]->count(), net_params[param_id]->mutable_gpu_diff(), this->history_[param_id]->mutable_gpu_data(), this->history_[update_history_offset + param_id]->mutable_gpu_data(), momentum, delta, local_rate); #else NO_GPU; #endif break; } default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void EltwiseLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int* mask = NULL; const Dtype* bottom_data_a = NULL; const Dtype* bottom_data_b = NULL; const int count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); if (broadcast_) { int dima[4]; int dimb[4]; for (int i=0; i<4; i++) { dima[i] = bottom[0]->shape()[i]; dimb[i] = bottom[1]->shape()[i]; } bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul_broadcast<Dtype>(dima, dimb, bottom_data_a, bottom_data_b, top_data); break; case EltwiseParameter_EltwiseOp_SUM: caffe_add_broadcast<Dtype>(dima, dimb, bottom_data_a, bottom_data_b, top_data); break; default: LOG(FATAL) << "Unknown elementwise broadcast operation."; } } else { switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int i = 2; i < bottom.size(); ++i) { caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_SUM: if (coeffs_[0]==1.0) { caffe_copy(count, bottom[0]->cpu_data(), top_data); } else { caffe_set(count, Dtype(0.), top_data); caffe_axpy(count, coeffs_[0], bottom[0]->cpu_data(), top_data); } for (int i = 1; i < bottom.size(); ++i) { if (coeffs_[i]==1.0) caffe_add (count, top_data, bottom[i]->cpu_data(), top_data); else caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_MAX: // Initialize mask = max_idx_.mutable_cpu_data(); caffe_set(count, -1, mask); caffe_set(count, Dtype(-FLT_MAX), top_data); // bottom 0 & 1 bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_a[idx] > bottom_data_b[idx]) { top_data[idx] = bottom_data_a[idx]; // maxval mask[idx] = 0; // maxid } else { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = 1; // maxid } } // bottom 2++ for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) { bottom_data_b = bottom[blob_idx]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_b[idx] > top_data[idx]) { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = blob_idx; // maxid } } } break; default: LOG(FATAL) << "Unknown elementwise operation."; } } }
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* const_bottom_data = bottom[0]->cpu_data(); const Dtype* const_top_data = top[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); const Dtype* shift_data = this->blobs_[1]->cpu_data(); // ---------- mean subtraction ---------- // // statistic across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), const_bottom_data, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); // statistic across batch caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // save history mean if (this->phase_ == TRAIN) { caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_, this->blobs_[2]->mutable_cpu_data()); } if (this->phase_ == TEST && moving_average_) { // use moving average mean caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(), batch_statistic_.mutable_cpu_data()); } // put mean blob into buffer_blob_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(-1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); // substract mean caffe_add(buffer_blob_.count(), const_bottom_data, buffer_blob_.cpu_data(), top_data); // ---------- variance normalization ---------- // // put the squares of X - mean into buffer_blob_ caffe_powx(buffer_blob_.count(), const_top_data, Dtype(2), buffer_blob_.mutable_cpu_data()); // statistic across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_, Dtype(1. / (height_ * width_)), buffer_blob_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); // statistic across batch caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1. / num_), spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_statistic_.mutable_cpu_data()); // save history variance if (this->phase_ == TRAIN) { caffe_cpu_axpby(batch_statistic_.count(), decay_, batch_statistic_.cpu_data(), Dtype(1) - decay_, this->blobs_[3]->mutable_cpu_data()); } if (this->phase_ == TEST && moving_average_) { // use moving average variance caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(), batch_statistic_.mutable_cpu_data()); } // add eps caffe_add_scalar(batch_statistic_.count(), var_eps_, batch_statistic_.mutable_cpu_data()); // std caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(), Dtype(0.5), batch_statistic_.mutable_cpu_data()); // put std blob into buffer_blob_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(), Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); // variance normalization caffe_div(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); // ---------- save x_norm and x_std ---------- // caffe_copy(buffer_blob_.count(), const_top_data, x_norm_.mutable_cpu_data()); caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(), x_std_.mutable_cpu_data()); // ---------- scale ---------- // caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); // ---------- shift ---------- // caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_statistic_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_, height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); }
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* const_top_data = top[0]->cpu_data(); const Dtype* scale_data = this->blobs_[0]->cpu_data(); const Dtype* shift_data = this->blobs_[1]->cpu_data(); switch (this->layer_param_.bn_param().bn_mode()) { case BNParameter_BNMode_LEARN: // put the squares of bottom into buffer_blob_ caffe_powx(bottom[0]->count(), bottom_data, Dtype(2), buffer_blob_.mutable_cpu_data()); // computes variance using var(X) = E(X^2) - (EX)^2 // EX across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1. / (H_ * W_)), bottom_data, spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); // EX across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_), spatial_mean_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_mean_.mutable_cpu_data()); // E(X^2) across spatial caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), spatial_variance_.mutable_cpu_data()); // E(X^2) across batch caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_), spatial_variance_.cpu_data(), batch_sum_multiplier_.cpu_data(), Dtype(0), batch_variance_.mutable_cpu_data()); caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2), buffer_blob_.mutable_cpu_data()); // (EX)^2 caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(), buffer_blob_.cpu_data(), batch_variance_.mutable_cpu_data()); // variance // save top[1] (batch_mean) and top[2] (batch_variance) if (top.size() > 1) { caffe_copy(batch_mean_.count(), batch_mean_.cpu_data(), top[1]->mutable_cpu_data()); } if (top.size() > 2) { caffe_copy(batch_variance_.count(), batch_variance_.cpu_data(), top[2]->mutable_cpu_data()); } // do mean and variance normalization // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_mean_.cpu_data(), Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(-1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), bottom_data, buffer_blob_.cpu_data(), top_data); // normalize variance caffe_add_scalar(batch_variance_.count(), var_eps_, batch_variance_.mutable_cpu_data()); caffe_powx(batch_variance_.count(), batch_variance_.cpu_data(), Dtype(0.5), batch_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), batch_variance_.cpu_data(), Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_div(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); // Saving x_norm caffe_copy(buffer_blob_.count(), const_top_data, x_norm_.mutable_cpu_data()); // scale caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), top_data, buffer_blob_.cpu_data(), top_data); // shift caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); break; case BNParameter_BNMode_INFERENCE: // scale caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0), spatial_variance_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_variance_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_mul(buffer_blob_.count(), bottom_data, buffer_blob_.cpu_data(), top_data); // shift caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0), spatial_mean_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1, Dtype(1), spatial_mean_.cpu_data(), spatial_sum_multiplier_.cpu_data(), Dtype(0), buffer_blob_.mutable_cpu_data()); caffe_add(buffer_blob_.count(), const_top_data, buffer_blob_.cpu_data(), top_data); break; default: LOG(FATAL) << "Unknown BN mode."; } }
void CRFWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // Backward flow is splited into 2 ways , one of which is to the local parameter, // and the other is to the lower layer through the diff blob of bottom[0] Dtype* ptr_pi_diff = this->blobs_[0]->mutable_cpu_diff(); Dtype* ptr_tr_diff = this->blobs_[1]->mutable_cpu_diff(); Dtype* ptr_mu_diff = this->blobs_[2]->mutable_cpu_diff(); Dtype* ptr_bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* ptr_state_err = gamma_.mutable_cpu_data(); Dtype* ptr_trans_err = epsilon_.mutable_cpu_data(); // some data needed const Dtype* state_err = gamma_.cpu_data(); const Dtype* trans_err = epsilon_.cpu_data(); const Dtype* feature_table = bottom[0]->cpu_data(); const Dtype* label = bottom[1]->cpu_data(); const Dtype* mu = this->blobs_[2]->cpu_data(); const Dtype* pi_diff = this->blobs_[0]->cpu_diff(); // same bias needed int ts = max_seq_length_ * feature_num_; int gs = max_seq_length_ * state_num_; int eps = max_seq_length_ * state_num_ * state_num_; for (int i = 0; i < num_; ++i) { // seq length of each instance should be different.. need to be reconsidered here int sl = max_seq_length_; // compute the state energy err and state trans err at each position of each instance for (int j = 0; j < sl; ++j) { int idx = *(label + i * max_seq_length_ + j); if (idx >= 0 && idx < state_num_) { *(ptr_state_err + i * gs + j * state_num_ + idx) += 1; } else { // TODO } if ( j >= sl - 1 ) continue; int idx_next = *(label + i * max_seq_length_ + j + 1); if (idx >= 0 && idx < state_num_ && idx_next >= 0 && idx_next < state_num_) { *(ptr_trans_err + i * gs + j * state_num_ * state_num_ + idx * state_num_ + idx_next) += 1; } else { // TODO } } // Backward to input blob, bottom_dif = Mu' dot state_err' caffe_cpu_gemm(CblasTrans, CblasTrans, feature_num_, sl, state_num_, (Dtype)1., mu, state_err + i * gs, (Dtype)0., ptr_bottom_diff + i * ts); // Backward to pi, pi += state_err(0) caffe_add(state_num_, pi_diff, state_err + i * gs, ptr_pi_diff); // Backward to mu, mu += state_err' dot bottom[0]' caffe_cpu_gemm(CblasTrans, CblasTrans, state_num_, feature_num_, sl, (Dtype)1., state_err + i * gs, feature_table + i * gs, (Dtype)1., ptr_mu_diff); // Backward to tr, sum_t(state_trans_err(t)) caffe_cpu_gemv(CblasNoTrans, state_num_ * state_num_, sl, (Dtype)1., trans_err + i * eps, multiplier_seq_len_.cpu_data(), (Dtype)0., ptr_tr_diff); } } }
void TripletClipHingeLossLayer<Dtype>::Backward_cpu( const vector<Blob<Dtype>*>& top, const vector<bool> &propagate_down, const vector<Blob<Dtype>*>& bottom){ const Dtype* orignalcode; const Dtype* similarcode; const Dtype* diffrcode; if (propagate_down[0]) { for (int i = 0; i < 3; ++i) {// for each stream need to get a loss int num = bottom[i]->num(); int channels = bottom[i]->channels(); for (int j = 0; j < num; ++j){ Dtype* bout = bottom[i]->mutable_cpu_diff();// get the 3 bottoms' address, the i th bottom's address orignalcode = ave_or.cpu_data() + (j / frame_num)*dim; similarcode = ave_si.cpu_data() + (j / frame_num)*dim; diffrcode = ave_di.cpu_data() + (j / frame_num)*dim; if (i == 0){ if (dist_sq_.cpu_data()[j / frame_num]>Dtype(FLT_MIN)){ caffe_sub(dim, diffrcode, similarcode, gradient_triplet.mutable_cpu_data());// the distance of F- and F+ caffe_scal(dim, Dtype(2) / Dtype(num), gradient_triplet.mutable_cpu_data()); } else caffe_set(dim, Dtype(FLT_MIN), gradient_triplet.mutable_cpu_data()); compute_gradient_structure(i, j); caffe_scal(dim, lamda, gradient_triplet.mutable_cpu_data()); caffe_scal(dim, Dtype(1.0) - lamda, gradient_structure.mutable_cpu_data()); caffe_add(dim, gradient_triplet.cpu_data(), gradient_structure.cpu_data(), gradient.mutable_cpu_data()); } if (i == 1){ if (dist_sq_.cpu_data()[j / frame_num] > Dtype(FLT_MIN)){ caffe_sub(dim, similarcode, orignalcode, gradient_triplet.mutable_cpu_data());// the distance of F+ and F caffe_scal(dim, Dtype(2) / Dtype(num), gradient_triplet.mutable_cpu_data()); } else caffe_set(dim, Dtype(FLT_MIN), gradient_triplet.mutable_cpu_data()); compute_gradient_structure(i, j); caffe_scal(dim, lamda, gradient_triplet.mutable_cpu_data()); caffe_scal(dim, Dtype(1.0) - lamda, gradient_structure.mutable_cpu_data()); caffe_add(dim, gradient_triplet.cpu_data(), gradient_structure.cpu_data(), gradient.mutable_cpu_data()); } if (i == 2){ if (dist_sq_.cpu_data()[j / frame_num] > Dtype(FLT_MIN)){ caffe_sub(dim, orignalcode, diffrcode, gradient_triplet.mutable_cpu_data()); caffe_scal(dim, Dtype(2) / Dtype(num), gradient_triplet.mutable_cpu_data()); } else caffe_set(dim, Dtype(FLT_MIN), gradient_triplet.mutable_cpu_data()); compute_gradient_structure(i, j); caffe_scal(dim, lamda, gradient_triplet.mutable_cpu_data()); caffe_scal(dim, Dtype(1.0) - lamda, gradient_structure.mutable_cpu_data()); caffe_add(dim, gradient_triplet.cpu_data(), gradient_structure.cpu_data(), gradient.mutable_cpu_data()); } caffe_scal(dim, Dtype(2.0), gradient.mutable_cpu_data()); caffe_copy(channels, gradient.cpu_data(), bout + (j*channels));//return the BP vector to the j th batch's bottom } } } }
void InnerProductForRegularizeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { InnerProductLayer<Dtype>::Backward_cpu(top,propagate_down,bottom); caffe_add(top[1]->count(), top[1]->cpu_diff(), this->blobs_[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff()); }
void LocalLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* x_data = col_buffer_.mutable_cpu_data(); Dtype* x_diff = col_buffer_.mutable_cpu_diff(); const Dtype* weight = this->blobs_[0]->cpu_data(); Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); Dtype* bias_diff = NULL; Blob<Dtype> intermediate; intermediate.Reshape(1, 1, 1, N_); Blob<Dtype> xt; xt.Reshape(1, 1, K_, N_); Dtype* xt_data = xt.mutable_cpu_data(); if (bias_term_) { bias_diff = this->blobs_[1]->mutable_cpu_diff(); memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count()); for (int n = 0; n < num_; ++n) { caffe_add(M_ * N_, bias_diff, top_diff + top[0]->offset(n), bias_diff); } } memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); for (int n=0; n<num_; n++) { im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_, width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data); // gradient wrt weight for (int m=0; m<num_output_; m++) { Dtype* filter_weight_diff = weight_diff+this->blobs_[0]->offset(m); for (int k=0; k<K_; k++) { caffe_mul(N_, top_diff+top[0]->offset(n, m), x_data+col_buffer_.offset(0,k), xt_data+xt.offset(0,0,k)); } caffe_cpu_axpby(K_*N_, Dtype(1.0), xt_data, Dtype(1.0), filter_weight_diff); } // gradient wrt bottom data if (propagate_down[0]) { memset(x_diff, 0, col_buffer_.count() * sizeof(Dtype)); for (int m=0; m<num_output_; m++) { for (int k=0; k<K_; k++) { caffe_mul(N_, top_diff+top[0]->offset(n, m), weight+this->blobs_[0]->offset(m,0,k), intermediate.mutable_cpu_data()); caffe_cpu_axpby(N_, Dtype(1.0), intermediate.cpu_data(), Dtype(1.0), x_diff+col_buffer_.offset(0,k)); } } // col2im back to the data col2im_cpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n)); } } }
void DeconvNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* deconv1_top_vec_diff = deconv1_top_vec[0]->mutable_cpu_diff(); Dtype* deconv2_top_vec_diff = deconv2_top_vec[0]->mutable_cpu_diff(); const Dtype* deconv2_top_vec_data = deconv2_top_vec[0]->cpu_data(); const Dtype* deconv1_top_vec_data = deconv1_top_vec[0]->cpu_data(); caffe_set(deconv2_top_vec[0]->count(), (Dtype)0, deconv2_top_vec_diff); caffe_set(deconv1_top_vec[0]->count(), (Dtype)0, deconv1_top_vec_diff); caffe_set(exp_top_vec[0]->count(), (Dtype)0, exp_top_vec[0]->mutable_cpu_diff()); //caffe_set(exp_bottom_vec[0]->count(), (Dtype)0, exp_bottom_vec[0]->mutable_cpu_diff()); caffe_set(deconv1_layer->blobs()[0]->count(), (Dtype)0, deconv1_layer->blobs()[0]->mutable_cpu_diff()); caffe_set(deconv2_layer->blobs()[0]->count(), (Dtype)0, deconv2_layer->blobs()[0]->mutable_cpu_diff()); //bias gradient, if necessary if (this->bias_term_ && this->param_propagate_down_[2]) { Dtype* bias_diff = this->blobs_[2]->mutable_cpu_diff(); for (int n = 0; n < top[0]->num(); ++n) { caffe_cpu_gemv<Dtype>(CblasNoTrans, top[0]->channels(), top[0]->height() * top[0]->width(), 1., top_diff+top[0]->offset(n), bias_multiplier.cpu_data(), 1., bias_diff); } } // weights and alpha gradient, propagate down to bottom if (param_propagate_down_[0] || param_propagate_down_[1] || propagate_down[0]) { vector<bool> no_propagate_down; no_propagate_down.push_back(false); vector<bool> yes_propagate_down; yes_propagate_down.push_back(true); // top_diff backward to deconv2_top_vec_diff for (int n = 0; n < top[0]->num(); ++n) { caffe_div(deconv1_top_vec[0]->count(), top_diff + top[0]->offset(n), deconv1_top_vec_data, deconv2_top_vec_diff + deconv2_top_vec[0]->offset(n)); } // backward throud deconv2_layer deconv2_layer->Backward(deconv2_top_vec, propagate_down, bottom); const Dtype* wa_diff = weights_alphas->cpu_diff(); // weight gradient if (param_propagate_down_[0]) { Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); const Dtype* alpha = alphas->cpu_data(); for (int ch_in = 0; ch_in < weights_alphas->num(); ++ch_in) { caffe_mul(alphas->count(), wa_diff + weights_alphas->offset(ch_in), alpha, weight_diff + this->blobs_[0]->offset(ch_in)); } } // alpha gradient if (param_propagate_down_[1] && average_train) { //alpha_diff1 Dtype* alpha_cache_diff = alpha_cache.mutable_cpu_diff(); Dtype* alpha_cache_diff2 = alpha_cache2.mutable_cpu_diff(); caffe_set(alpha_cache.count(), (Dtype)0, alpha_cache_diff); caffe_set(alpha_cache2.count(), (Dtype)0, alpha_cache_diff2); const Dtype* weight = this->blobs_[0]->cpu_data(); for (int ch_in = 0; ch_in < weights_alphas->num(); ++ch_in) { caffe_mul(alpha_cache.count(), wa_diff + weights_alphas->offset(ch_in), weight + this->blobs_[0]->offset(ch_in), alpha_cache_diff); caffe_add(alpha_cache2.count(), alpha_cache_diff, alpha_cache_diff2, alpha_cache_diff2); } // top_diff backward to deonv1_top_vec_diff Dtype* deconv1_top_cache_diff = deconv1_top_cache.mutable_cpu_diff(); caffe_set(deconv1_top_cache.count(), (Dtype)0, deconv1_top_cache_diff); for (int n = 0; n < top[0]->num(); ++n) { caffe_mul(deconv1_top_cache.count(), top_diff + top[0]->offset(n), deconv2_top_vec_data + deconv2_top_vec[0]->offset(n), deconv1_top_cache_diff); caffe_add(deconv1_top_cache.count(), deconv1_top_cache_diff, deconv1_top_vec_diff, deconv1_top_vec_diff); } caffe_div(deconv1_top_cache.count(), deconv1_top_vec_diff, deconv1_top_vec_data, deconv1_top_vec_diff); caffe_div(deconv1_top_cache.count(), deconv1_top_vec_diff, deconv1_top_vec_data, deconv1_top_vec_diff); // backward through deconv1_layer deconv1_layer->Backward(deconv1_top_vec, no_propagate_down, deconv1_bottom_vec); // alpha_diff2 Dtype* alpha_diff = alphas->mutable_cpu_diff(); //fuse alpha_diff1 and alpha_diff2 caffe_sub(alpha_cache.count(), alpha_cache_diff2, alpha_diff, alpha_diff); exp_layer->Backward(exp_top_vec, yes_propagate_down, exp_bottom_vec); } } }
void AdaGradSolver<Dtype>::ComputeUpdateValue() { vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); vector<float>& net_params_lr = this->net_->params_lr(); vector<float>& net_params_weight_decay = this->net_->params_weight_decay(); // get the learning rate Dtype rate = this->GetLearningRate(); Dtype delta = this->param_.delta(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate; } Dtype weight_decay = this->param_.weight_decay(); string regularization_type = this->param_.regularization_type(); switch (Caffe::mode()) { case Caffe::CPU: for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else if (regularization_type == "L1") { caffe_cpu_sign(net_params[param_id]->count(), net_params[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); caffe_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history caffe_add(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->history_[param_id]->mutable_cpu_data()); // prepare update caffe_powx(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); caffe_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_cpu_data()); caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // scale and copy caffe_cpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->cpu_data(), Dtype(0), net_params[param_id]->mutable_cpu_diff()); } break; case Caffe::GPU: #ifndef CPU_ONLY for (int param_id = 0; param_id < net_params.size(); ++param_id) { Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; if (local_decay) { if (regularization_type == "L2") { // add weight decay caffe_gpu_axpy(net_params[param_id]->count(), local_decay, net_params[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else if (regularization_type == "L1") { caffe_gpu_sign(net_params[param_id]->count(), net_params[param_id]->gpu_data(), this->temp_[param_id]->mutable_gpu_data()); caffe_gpu_axpy(net_params[param_id]->count(), local_decay, this->temp_[param_id]->gpu_data(), net_params[param_id]->mutable_gpu_diff()); } else { LOG(FATAL) << "Unknown regularization type: " << regularization_type; } } // compute square of gradient in update caffe_gpu_powx(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), Dtype(2), this->update_[param_id]->mutable_gpu_data()); // update history caffe_gpu_add(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), this->history_[param_id]->gpu_data(), this->history_[param_id]->mutable_gpu_data()); // prepare update caffe_gpu_powx(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), Dtype(0.5), this->update_[param_id]->mutable_gpu_data()); caffe_gpu_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_gpu_data()); caffe_gpu_div(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); // scale and copy caffe_gpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->gpu_data(), Dtype(0), net_params[param_id]->mutable_gpu_diff()); } #else NO_GPU; #endif break; default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
void AdaGradSolver<Dtype>::ComputeUpdateValue(uint_tp param_id, Dtype rate) { CHECK(Caffe::root_solver()); const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_lr = this->net_->params_lr(); Dtype delta = this->param_.delta(); Dtype local_rate = rate * net_params_lr[param_id]; switch (Caffe::mode()) { case Caffe::CPU: { // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history caffe_add(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->history_[param_id]->mutable_cpu_data()); // prepare update caffe_powx(net_params[param_id]->count(), this->history_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); caffe_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_cpu_data()); caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // scale and copy caffe_cpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->cpu_data(), Dtype(0), net_params[param_id]->mutable_cpu_diff()); break; } case Caffe::GPU: { #ifndef CPU_ONLY if (this->device_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA // compute square of gradient in update caffe_gpu_powx(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), Dtype(2), this->update_[param_id]->mutable_gpu_data()); // update history caffe_gpu_add(net_params[param_id]->count(), this->update_[param_id]->gpu_data(), this->history_[param_id]->gpu_data(), this->history_[param_id]->mutable_gpu_data()); // prepare update caffe_gpu_powx(net_params[param_id]->count(), this->history_[param_id]->gpu_data(), Dtype(0.5), this->update_[param_id]->mutable_gpu_data()); caffe_gpu_add_scalar(net_params[param_id]->count(), delta, this->update_[param_id]->mutable_gpu_data()); caffe_gpu_div(net_params[param_id]->count(), net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(), this->update_[param_id]->mutable_gpu_data()); // scale and copy caffe_gpu_axpby(net_params[param_id]->count(), local_rate, this->update_[param_id]->gpu_data(), Dtype(0), net_params[param_id]->mutable_gpu_diff()); #endif // USE_CUDA } else { #ifdef USE_GREENTEA // compute square of gradient in update greentea_gpu_powx<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (net_params[param_id]->gpu_diff()), 0, Dtype(2), (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); // update history greentea_gpu_add<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (this->update_[param_id]->gpu_data()), 0, (cl_mem) (this->history_[param_id]->gpu_data()), 0, (cl_mem) (this->history_[param_id]->mutable_gpu_data()), 0); // prepare update greentea_gpu_powx<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (this->history_[param_id]->gpu_data()), 0, Dtype(0.5), (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); greentea_gpu_add_scalar<Dtype>( this->device_->id(), net_params[param_id]->count(), delta, (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); greentea_gpu_div<Dtype>( this->device_->id(), net_params[param_id]->count(), (cl_mem) (net_params[param_id]->gpu_diff()), 0, (cl_mem) (this->update_[param_id]->gpu_data()), 0, (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0); // scale and copy greentea_gpu_axpby<Dtype>( this->device_->id(), net_params[param_id]->count(), local_rate, (cl_mem) (this->update_[param_id]->gpu_data()), 0, Dtype(0), (cl_mem) (net_params[param_id]->mutable_gpu_diff()), 0); #endif // USE_GREENTEA } #else NO_GPU; #endif break; } default: LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode(); } }
void NonLocalLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { vector<bool> propagate_down_sub; propagate_down_sub.push_back(propagate_down[0]); propagate_down_sub.push_back(propagate_down[0]); if (propagate_down[0]) { for (int i = 0; i < eltwise_bottom_vec.size(); i++) caffe_set(eltwise_bottom_vec[i]->count(), (Dtype)0, eltwise_bottom_vec[i]->mutable_cpu_diff()); for (int i = 0; i < smooth_bottom_vec.size(); i++) caffe_set(smooth_bottom_vec[i]->count(), (Dtype)0, smooth_bottom_vec[i]->mutable_cpu_diff()); for (int i = 0; i < euclidean_bottom_vec.size(); i++) caffe_set(euclidean_bottom_vec[i]->count(), (Dtype)0, euclidean_bottom_vec[i]->mutable_cpu_diff()); for (int i = 0; i < split_1_bottom_vec.size(); i++) caffe_set(split_1_bottom_vec[i]->count(), (Dtype)0, split_1_bottom_vec[i]->mutable_cpu_diff()); for (int i = 0; i < smooth_top_vec.size(); i++) caffe_set(smooth_top_vec[i]->count(), (Dtype)0, smooth_top_vec[i]->mutable_cpu_diff()); for (int i = 0; i < split_0_top_vec.size(); i++) caffe_set(split_0_top_vec[i]->count(), (Dtype)0, split_0_top_vec[i]->mutable_cpu_diff()); for (int i = 0; i < split_3_top_vec.size(); i++) caffe_set(split_3_top_vec[i]->count(), (Dtype)0, split_3_top_vec[i]->mutable_cpu_diff()); for (int i = 0; i < normalize_top_vec.size(); i++) caffe_set(normalize_top_vec[i]->count(), (Dtype)0, normalize_top_vec[i]->mutable_cpu_diff()); if (top.size() == 3) eltwise_layer->Backward(eltwise_top_vec, propagate_down_sub, eltwise_bottom_vec); split_layer_2->Backward(split_2_top_vec, propagate_down_sub, split_2_bottom_vec); //int tmp_offset = smooth_top_vec[0]->offset(1); const int tmp_offset = split_3_top_vec[0]->offset(1); //const Dtype* eltwise_bottom_1_diff = eltwise_bottom_vec[1]->cpu_diff(); const Dtype* split_2_bottom_diff = split_2_bottom_vec[0]->cpu_diff(); //Dtype* smooth_top_diff = smooth_top_vec[0]->mutable_cpu_diff(); Dtype* split_3_top_diff = split_3_top_vec[0]->mutable_cpu_diff(); for (int n = 0; n < split_2_bottom_vec[0]->num(); ++n) { for (int ch = 0; ch < channels_; ++ch) { //caffe_add(tmp_offset, smooth_top_diff, split_2_bottom_diff, smooth_top_diff); caffe_add(tmp_offset, split_3_top_diff, split_2_bottom_diff, split_3_top_diff); split_2_bottom_diff += tmp_offset; } //smooth_top_diff += tmp_offset; split_3_top_diff += tmp_offset; } const int norm_offset = normalize_top_vec[0]->offset(1); Dtype* normalize_diff = normalize_top_vec[0]->mutable_cpu_diff(); const Dtype* top_1_diff = top[1]->cpu_diff(); for (int n = 0; n < normalize_top_vec[0]->num(); ++n) { for (int ch = 0; ch < channels_; ++ch) { caffe_add(tmp_offset, normalize_diff, top_1_diff, normalize_diff); top_1_diff += norm_offset; } normalize_diff += norm_offset; } //nomralize_top_vec[0]->ShareDiff(*top[1]); normalize_layer->Backward(normalize_top_vec, propagate_down_sub, normalize_bottom_vec); split_3_top_vec[1]->ShareDiff(*normalize_bottom_vec[0]); split_layer_3->Backward(split_3_top_vec, propagate_down_sub, split_3_bottom_vec); smooth_threshold_layer->Backward(smooth_top_vec, propagate_down_sub, smooth_bottom_vec); caffe_scal(euclidean_top_vec[0]->count(), (Dtype)(1.0 / bottom[0]->channels()), euclidean_top_vec[0]->mutable_cpu_diff()); euclidean_layer->Backward(euclidean_top_vec, propagate_down_sub, euclidean_bottom_vec); split_1_top_vec[1]->ShareDiff(*euclidean_bottom_vec[0]); split_layer_1->Backward(split_1_top_vec, propagate_down_sub, split_1_bottom_vec); for (int n = 0; n < num_; ++n) { col2im_center_cpu(img2col_1_top.cpu_diff() + img2col_1_top.offset(n), channels_, height_, width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, split_0_top_vec[1]->mutable_cpu_diff() + split_0_top_vec[1]->offset(n)); col2im_cpu(img2col_0_top.cpu_diff() + img2col_0_top.offset(n), channels_, height_, width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, 1,1, split_0_top_vec[0]->mutable_cpu_diff() + split_0_top_vec[0]->offset(n)); } split_layer_0->Backward(split_0_top_vec, propagate_down_sub,bottom); } }
void CoupledClusterLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { pos_ids = std::vector<std::vector<int> >(group_num, std::vector<int>()); neg_ids = std::vector<std::vector<int> >(group_num, std::vector<int>()); pos_backward = std::vector<bool>(group_num*N, false); neg_backward = std::vector<bool>(group_num*N, false); const Dtype *feat_ptr = bottom[0]->cpu_data(); const Dtype *label_ptr = bottom[1]->cpu_data(); Dtype *diff_ptr_ = diff_.mutable_cpu_data(); Dtype loss(0); caffe_set(feat_len*group_num, Dtype(0), pos_center_.mutable_cpu_data()); int cnt = 0; /* i -> group index */ for(int i=0; i<group_num; ++i) { /* search for the positive id */ std::set<Dtype> labels; Dtype anchor_id = -1; for(int j=0; j<N; ++j) { Dtype tmp = label_ptr[N*i+j]; if(labels.count(tmp)>0) { anchor_id = tmp; break; } else labels.insert(tmp); } // CHECK_NE(anchor_id, -1); /* collect for positive and negative ids, compute the center of positive samples */ for(int j=0; j<N; ++j) { if(label_ptr[i*N+j]==anchor_id){ pos_ids[i].push_back(j); caffe_add(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i); } else neg_ids[i].push_back(j); } caffe_cpu_scale(feat_len, Dtype(1)/pos_ids[i].size(), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i); if(neg_ids[i].size()==0 || pos_ids[i].size()<=1) continue; Dtype pos_mdist = Dtype(0); Dtype neg_min_val = -1; Dtype pos_max_val = -1; for(int j=0; j<N; ++j) { // f[j]-center caffe_sub(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.cpu_data()+feat_len*i, diff_ptr_+feat_len*(i*N+j)); if(scale!=1) caffe_cpu_scale(feat_len, scale, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j)); Dtype d = caffe_cpu_dot(feat_len, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j)); if(log_flag) LOG(INFO) << "i " << i << ", j " << j << ", d " << d; dist_sq_.mutable_cpu_data()[i*N+j] = d; if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0 && (neg_min_val==-1 || d<neg_min_val)) neg_min_val = d; else if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)==0 && (pos_max_val==-1 || d>pos_max_val)) pos_max_val = d; } for(int j=0; j<N; ++j) { if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0) { Dtype d = dist_sq_.cpu_data()[i*N+j]; Dtype mdist = std::max(-d+margin+pos_max_val, Dtype(0)); if(log_flag) LOG(INFO) << "j=" << j << ", d=" << d << ", pos_max_val=" << pos_max_val << ", mdist=" << mdist; if(mdist>0) neg_backward[i*N+j] = true; } else { Dtype d = dist_sq_.cpu_data()[i*N+j]; Dtype mdist = std::max(d+margin-neg_min_val, Dtype(0)); if(log_flag) LOG(INFO) << "j=" << j << ", d=" << d << ", neg_min_val=" << neg_min_val << ", mdist=" << mdist; if(mdist>0) pos_backward[i*N+j] = true; pos_mdist += mdist; } } /* average punishment */ pos_mdist /= pos_ids[i].size(); // pos_mdist *= 2; if(log_flag) LOG(INFO) << "pos_mdist " << pos_mdist << ", neg_min_val " << neg_min_val; CHECK_GE(pos_ids[i].size(), 2); CHECK_GE(neg_ids[i].size(), 1); loss += pos_mdist; ++cnt; } loss = loss / cnt; top[0]->mutable_cpu_data()[0] = loss; }