void TripletLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype eps = this->layer_param_.triplet_loss_param().eps(); Dtype loss = 0; Dtype margin = this->layer_param_.triplet_loss_param().margin(); caffe_cpu_gemm(CblasNoTrans, CblasTrans, sample_num_, sample_num_, feature_dim_, Dtype(1), bottom[0]->cpu_data(), bottom[0]->cpu_data(), Dtype(0), inner_matrix_.mutable_cpu_data()); for (int i = 0; i < triplet_num_; ++i) { int a_idx = bottom[1]->cpu_data()[i * 3]; int p_idx = bottom[1]->cpu_data()[i * 3 + 1]; int n_idx = bottom[1]->cpu_data()[i * 3 + 2]; const Dtype *a_pointer = bottom[0]->cpu_data() + a_idx * feature_dim_; const Dtype *p_pointer = bottom[0]->cpu_data() + p_idx * feature_dim_; const Dtype *n_pointer = bottom[0]->cpu_data() + n_idx * feature_dim_; const Dtype *inner_matrix_data = inner_matrix_.cpu_data(); Dtype a_norm = sqrt(inner_matrix_data[a_idx * sample_num_ + a_idx] + eps); Dtype p_norm = sqrt(inner_matrix_data[p_idx * sample_num_ + p_idx] + eps); Dtype n_norm = sqrt(inner_matrix_data[n_idx * sample_num_ + n_idx] + eps); Dtype inner_ap = inner_matrix_data[a_idx * sample_num_ + p_idx]; Dtype inner_an = inner_matrix_data[a_idx * sample_num_ + n_idx]; Dtype dist_ap = inner_ap / (a_norm * p_norm); Dtype dist_an = inner_an / (a_norm * n_norm); if (dist_ap - dist_an - margin < 0) { ComputeDiff_cpu(a_pointer, p_pointer, a_norm, p_norm, inner_ap, diff_ap_.mutable_cpu_data()); ComputeDiff_cpu(a_pointer, n_pointer, a_norm, n_norm, inner_an, diff_an_.mutable_cpu_data()); ComputeDiff_cpu(p_pointer, a_pointer, p_norm, a_norm, inner_ap, diff_pa_.mutable_cpu_data()); ComputeDiff_cpu(n_pointer, a_pointer, n_norm, a_norm, inner_an, diff_na_.mutable_cpu_data()); caffe_cpu_axpby(feature_dim_, Dtype(1), diff_an_.cpu_data(), Dtype(1), bottom_diff_.mutable_cpu_data() + (a_idx * feature_dim_)); caffe_cpu_axpby(feature_dim_, Dtype(-1), diff_ap_.cpu_data(), Dtype(1), bottom_diff_.mutable_cpu_data() + (a_idx * feature_dim_)); caffe_cpu_axpby(feature_dim_, Dtype(-1), diff_pa_.cpu_data(), Dtype(1), bottom_diff_.mutable_cpu_data() + (p_idx * feature_dim_)); caffe_cpu_axpby(feature_dim_, Dtype(1), diff_na_.cpu_data(), Dtype(1), bottom_diff_.mutable_cpu_data() + (n_idx * feature_dim_)); loss += dist_an + margin - dist_ap; } } //Dtype scalar = Dtype(1) / triplet_num_; Dtype scalar = Dtype(1) / sample_num_; top[0]->mutable_cpu_data()[0] = loss * scalar; }
void BatchTripletLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Blob<Dtype>* feat = bottom[0]; const Dtype* feat_data = feat->cpu_data(); Dtype* feat_diff = feat->mutable_cpu_diff(); int count = feat->count(); int num = feat->num(); int dim = count / num; int agg_step = num * sizeof(Dtype); Dtype * agg_data = (Dtype *)aggregator_->mutable_cpu_data(); caffe_memset(num * agg_step, 0, agg_data); Dtype scale1 = Dtype(2) / triplets_.size() * mu_; for (int i=0; i<triplets_.size(); ++i) { int qry_id = triplets_[i].first_; int pos_id = triplets_[i].second_; int neg_id = triplets_[i].third_; agg_data[qry_id * num + neg_id] += scale1; agg_data[qry_id * num + pos_id] -= scale1; agg_data[pos_id * num + pos_id] += scale1; agg_data[pos_id * num + qry_id] -= scale1; agg_data[neg_id * num + qry_id] += scale1; agg_data[neg_id * num + neg_id] -= scale1; } Dtype scale2 = Dtype(2) / pos_pairs_.size() * (Dtype(1) - mu_); for (int i=0; i<pos_pairs_.size(); ++i) { int qry_id = pos_pairs_[i].first; int pos_id = pos_pairs_[i].second; agg_data[qry_id * num + qry_id] += scale2; agg_data[qry_id * num + pos_id] -= scale2; agg_data[pos_id * num + pos_id] += scale2; agg_data[pos_id * num + qry_id] -= scale2; } caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, num, Dtype(1), agg_data, feat_data, Dtype(0), feat_diff); } }
void BiasLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bias_data = ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); if (bottom[0] != top[0]) { const Dtype* bottom_data = bottom[0]->cpu_data(); caffe_copy(bottom[0]->count(), bottom_data, top_data); } for (int n = 0; n < outer_dim_; ++n) { caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_, inner_dim_, 1, Dtype(1), bias_data, bias_multiplier_.cpu_data(), Dtype(1), top_data); top_data += dim_; } }
void CRFWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { // Backward flow is splited into 2 ways , one of which is to the local parameter, // and the other is to the lower layer through the diff blob of bottom[0] Dtype* ptr_pi_diff = this->blobs_[0]->mutable_cpu_diff(); Dtype* ptr_tr_diff = this->blobs_[1]->mutable_cpu_diff(); Dtype* ptr_mu_diff = this->blobs_[2]->mutable_cpu_diff(); Dtype* ptr_bottom_diff = bottom[0]->mutable_cpu_diff(); Dtype* ptr_state_err = gamma_.mutable_cpu_data(); Dtype* ptr_trans_err = epsilon_.mutable_cpu_data(); // some data needed const Dtype* state_err = gamma_.cpu_data(); const Dtype* trans_err = epsilon_.cpu_data(); const Dtype* feature_table = bottom[0]->cpu_data(); const Dtype* label = bottom[1]->cpu_data(); const Dtype* mu = this->blobs_[2]->cpu_data(); const Dtype* pi_diff = this->blobs_[0]->cpu_diff(); // same bias needed int ts = max_seq_length_ * feature_num_; int gs = max_seq_length_ * state_num_; int eps = max_seq_length_ * state_num_ * state_num_; for (int i = 0; i < num_; ++i) { // seq length of each instance should be different.. need to be reconsidered here int sl = max_seq_length_; // compute the state energy err and state trans err at each position of each instance for (int j = 0; j < sl; ++j) { int idx = *(label + i * max_seq_length_ + j); if (idx >= 0 && idx < state_num_) { *(ptr_state_err + i * gs + j * state_num_ + idx) += 1; } else { // TODO } if ( j >= sl - 1 ) continue; int idx_next = *(label + i * max_seq_length_ + j + 1); if (idx >= 0 && idx < state_num_ && idx_next >= 0 && idx_next < state_num_) { *(ptr_trans_err + i * gs + j * state_num_ * state_num_ + idx * state_num_ + idx_next) += 1; } else { // TODO } } // Backward to input blob, bottom_dif = Mu' dot state_err' caffe_cpu_gemm(CblasTrans, CblasTrans, feature_num_, sl, state_num_, (Dtype)1., mu, state_err + i * gs, (Dtype)0., ptr_bottom_diff + i * ts); // Backward to pi, pi += state_err(0) caffe_add(state_num_, pi_diff, state_err + i * gs, ptr_pi_diff); // Backward to mu, mu += state_err' dot bottom[0]' caffe_cpu_gemm(CblasTrans, CblasTrans, state_num_, feature_num_, sl, (Dtype)1., state_err + i * gs, feature_table + i * gs, (Dtype)1., ptr_mu_diff); // Backward to tr, sum_t(state_trans_err(t)) caffe_cpu_gemv(CblasNoTrans, state_num_ * state_num_, sl, (Dtype)1., trans_err + i * eps, multiplier_seq_len_.cpu_data(), (Dtype)0., ptr_tr_diff); } } }