C++ (Cpp) caffe_cpu_gemm Exemples

Exemple #1

0

Afficher le fichier

Fichier : triplet_loss_layer.cpp Projet : edificewang/caffe_facenet

void TripletLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
  const vector<Blob<Dtype>*>& top) {
  Dtype eps = this->layer_param_.triplet_loss_param().eps();
  Dtype loss = 0;
  Dtype margin = this->layer_param_.triplet_loss_param().margin();
  caffe_cpu_gemm(CblasNoTrans, CblasTrans, sample_num_, sample_num_,
      feature_dim_, Dtype(1), bottom[0]->cpu_data(),
      bottom[0]->cpu_data(), Dtype(0),
      inner_matrix_.mutable_cpu_data());

  for (int i = 0; i < triplet_num_; ++i) {
    int a_idx = bottom[1]->cpu_data()[i * 3];
    int p_idx = bottom[1]->cpu_data()[i * 3 + 1];
    int n_idx = bottom[1]->cpu_data()[i * 3 + 2];
    const Dtype *a_pointer = bottom[0]->cpu_data() + a_idx * feature_dim_;
    const Dtype *p_pointer = bottom[0]->cpu_data() + p_idx * feature_dim_;
    const Dtype *n_pointer = bottom[0]->cpu_data() + n_idx * feature_dim_;
    const Dtype *inner_matrix_data = inner_matrix_.cpu_data();
    Dtype a_norm = sqrt(inner_matrix_data[a_idx * sample_num_ + a_idx] + eps);
    Dtype p_norm = sqrt(inner_matrix_data[p_idx * sample_num_ + p_idx] + eps);
    Dtype n_norm = sqrt(inner_matrix_data[n_idx * sample_num_ + n_idx] + eps);
    Dtype inner_ap = inner_matrix_data[a_idx * sample_num_ + p_idx];
    Dtype inner_an = inner_matrix_data[a_idx * sample_num_ + n_idx];
    Dtype dist_ap = inner_ap / (a_norm * p_norm);
    Dtype dist_an = inner_an / (a_norm * n_norm);
    if (dist_ap - dist_an - margin < 0) {
      ComputeDiff_cpu(a_pointer, p_pointer, a_norm,
          p_norm, inner_ap, diff_ap_.mutable_cpu_data());
      ComputeDiff_cpu(a_pointer, n_pointer, a_norm,
          n_norm, inner_an, diff_an_.mutable_cpu_data());
      ComputeDiff_cpu(p_pointer, a_pointer, p_norm,
          a_norm, inner_ap, diff_pa_.mutable_cpu_data());
      ComputeDiff_cpu(n_pointer, a_pointer, n_norm,
          a_norm, inner_an, diff_na_.mutable_cpu_data());

      caffe_cpu_axpby(feature_dim_, Dtype(1),
          diff_an_.cpu_data(), Dtype(1),
          bottom_diff_.mutable_cpu_data() + (a_idx * feature_dim_));
      caffe_cpu_axpby(feature_dim_, Dtype(-1),
          diff_ap_.cpu_data(), Dtype(1),
          bottom_diff_.mutable_cpu_data() + (a_idx * feature_dim_));
      caffe_cpu_axpby(feature_dim_, Dtype(-1),
          diff_pa_.cpu_data(), Dtype(1),
          bottom_diff_.mutable_cpu_data() + (p_idx * feature_dim_));
      caffe_cpu_axpby(feature_dim_, Dtype(1),
          diff_na_.cpu_data(), Dtype(1),
          bottom_diff_.mutable_cpu_data() + (n_idx * feature_dim_));

      loss += dist_an + margin - dist_ap;
    }
  }
  //Dtype scalar = Dtype(1) / triplet_num_;
  Dtype scalar = Dtype(1) / sample_num_;
  top[0]->mutable_cpu_data()[0] = loss * scalar;
}

Exemple #2

0

Afficher le fichier

Fichier : batch_triplet_loss_layer.cpp Projet : akumar14/caffe-sl

void BatchTripletLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down[0]) {
    Blob<Dtype>* feat = bottom[0];
    const Dtype* feat_data = feat->cpu_data();
    Dtype* feat_diff = feat->mutable_cpu_diff();
    int count = feat->count();
    int num = feat->num();
    int dim = count / num;
    int agg_step = num * sizeof(Dtype);
    Dtype * agg_data = (Dtype *)aggregator_->mutable_cpu_data();
    caffe_memset(num * agg_step, 0, agg_data);

    Dtype scale1 = Dtype(2) / triplets_.size() * mu_;
    for (int i=0; i<triplets_.size(); ++i) {
      int qry_id = triplets_[i].first_;
      int pos_id = triplets_[i].second_;
      int neg_id = triplets_[i].third_;

      agg_data[qry_id * num + neg_id] += scale1;
      agg_data[qry_id * num + pos_id] -= scale1;

      agg_data[pos_id * num + pos_id] += scale1;
      agg_data[pos_id * num + qry_id] -= scale1;

      agg_data[neg_id * num + qry_id] += scale1;
      agg_data[neg_id * num + neg_id] -= scale1;
    }

    Dtype scale2 = Dtype(2) / pos_pairs_.size() * (Dtype(1) - mu_);
    for (int i=0; i<pos_pairs_.size(); ++i) {
      int qry_id = pos_pairs_[i].first;
      int pos_id = pos_pairs_[i].second;

      agg_data[qry_id * num + qry_id] += scale2;
      agg_data[qry_id * num + pos_id] -= scale2;

      agg_data[pos_id * num + pos_id] += scale2;
      agg_data[pos_id * num + qry_id] -= scale2;
    }

    caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, num,
        Dtype(1), agg_data, feat_data, Dtype(0), feat_diff);
  }
}

Exemple #3

0

Afficher le fichier

Fichier : bias_layer.cpp Projet : dfhe2004/caffe-fast-rcnn

void BiasLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* bias_data =
      ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  if (bottom[0] != top[0]) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    caffe_copy(bottom[0]->count(), bottom_data, top_data);
  }
  for (int n = 0; n < outer_dim_; ++n) {
    caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_,
        inner_dim_, 1, Dtype(1), bias_data,
        bias_multiplier_.cpu_data(), Dtype(1), top_data);
    top_data += dim_;
  }
}

Exemple #4

0

Afficher le fichier

Fichier : crf_loss_layer.cpp Projet : DogNick/nlpcaffe

void CRFWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
	if (propagate_down[1]) {
		LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs.";
	}
	if (propagate_down[0]) {
		// Backward flow is splited into 2 ways , one of which is to the local parameter, 
		// and the other is to the lower layer through the diff blob of bottom[0]
		Dtype* ptr_pi_diff = this->blobs_[0]->mutable_cpu_diff();
		Dtype* ptr_tr_diff = this->blobs_[1]->mutable_cpu_diff();
		Dtype* ptr_mu_diff = this->blobs_[2]->mutable_cpu_diff();
		Dtype* ptr_bottom_diff = bottom[0]->mutable_cpu_diff(); 

		Dtype* ptr_state_err = gamma_.mutable_cpu_data();
		Dtype* ptr_trans_err = epsilon_.mutable_cpu_data();
        
		// some data needed 
		const Dtype* state_err = gamma_.cpu_data();
		const Dtype* trans_err = epsilon_.cpu_data();
		const Dtype* feature_table = bottom[0]->cpu_data();
		const Dtype* label = bottom[1]->cpu_data();
	    const Dtype* mu = this->blobs_[2]->cpu_data(); 
		const Dtype* pi_diff = this->blobs_[0]->cpu_diff();

	    // same bias needed
		int ts = max_seq_length_ * feature_num_;  
		int gs = max_seq_length_ * state_num_;
		int eps = max_seq_length_ * state_num_ * state_num_;

		for (int i = 0; i < num_; ++i) {
			// seq length of each instance should be different.. need to be reconsidered here 
			int sl = max_seq_length_;

			// compute the state energy err and state trans err at each position of each instance
			for (int j = 0; j < sl; ++j) {
				int idx = *(label + i * max_seq_length_ + j); 
				if (idx >= 0 && idx < state_num_) {
					*(ptr_state_err + i * gs +  j * state_num_ + idx) += 1;	
				} else {
					// TODO
				}
				if ( j >= sl - 1 ) 
					continue;
				int idx_next = *(label + i * max_seq_length_ + j + 1);	
				if (idx >= 0 && idx < state_num_ && idx_next >= 0 && idx_next < state_num_) {
					*(ptr_trans_err + i * gs +  j * state_num_ * state_num_ + idx * state_num_ + idx_next) += 1;	
				} else {
					// TODO
				}
			} 	

			// Backward to input blob, bottom_dif = Mu' dot state_err' 
			caffe_cpu_gemm(CblasTrans, CblasTrans, feature_num_, sl, state_num_, (Dtype)1., 
			    mu, state_err + i * gs, (Dtype)0., ptr_bottom_diff + i * ts);  
			// Backward to pi, pi += state_err(0) 
			caffe_add(state_num_, pi_diff, state_err + i * gs, ptr_pi_diff); 	
			// Backward to mu, mu += state_err' dot  bottom[0]' 
			caffe_cpu_gemm(CblasTrans, CblasTrans, state_num_, feature_num_, sl, (Dtype)1.,
			    state_err + i * gs, feature_table + i * gs, (Dtype)1., ptr_mu_diff);     	
			// Backward to tr, sum_t(state_trans_err(t))
			caffe_cpu_gemv(CblasNoTrans, state_num_ * state_num_, sl, (Dtype)1., 
				trans_err + i * eps, multiplier_seq_len_.cpu_data(), (Dtype)0., ptr_tr_diff);
		} 
	  }
}