Dtype Blob<Dtype>::asum_diff() const { if (!diff_) { return 0; } switch (diff_->head()) { case SyncedMemory::SYNCED_PRV: case SyncedMemory::HEAD_AT_PRV: return caffe_cpu_asum( prv_diff_count(), prv_diff()); case SyncedMemory::HEAD_AT_CPU: return caffe_cpu_asum(count_, cpu_diff()); case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY { Dtype asum; caffe_gpu_asum(count_, gpu_diff(), &asum); return asum; } #else NO_GPU; #endif case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head(); } return 0; }
void TripletRankingHingeLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ int dim_v = batch_*dim_; const Dtype* sub_or_si; const Dtype* sub_or_di; Dtype b = 2; Dtype Tripletlosstotal(0.0); //The triplet ranking loss caffe_sub(dim_v, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_sub_or_si.mutable_cpu_data()); // F-F+ caffe_sub(dim_v, bottom[0]->cpu_data(), bottom[2]->cpu_data(), diff_sub_or_di.mutable_cpu_data()); // F-F- caffe_powx(dim_v, diff_sub_or_si.cpu_data(), Dtype(2.0), diff_pow_or_si.mutable_cpu_data()); //Pow caffe_powx(dim_v, diff_sub_or_di.cpu_data(), Dtype(2.0), diff_pow_or_di.mutable_cpu_data()); //Pow for (int n = 0; n < batch_; n++){ sub_or_si = diff_pow_or_si.cpu_data() + diff_pow_or_si.offset(n); sub_or_di = diff_pow_or_di.cpu_data() + diff_pow_or_di.offset(n); Dtype result1 = 0; Dtype result2 = 0; result1 = caffe_cpu_asum(dim_, sub_or_si); result2 = caffe_cpu_asum(dim_, sub_or_di); Dtype loss(0.0); loss = std::max(margin + result1 - result2, Dtype(0));// compute the loss diff_.mutable_cpu_data()[n] = loss; // save the loss[i] } for (int k = 0; k < batch_; k++){ dist_sq_.mutable_cpu_data()[k] = diff_.cpu_data()[k];// save the loss[i] for BP Tripletlosstotal += dist_sq_.cpu_data()[k]; } Tripletlosstotal = Tripletlosstotal / static_cast<Dtype>(bottom[0]->num()); //get the average loss top[0]->mutable_cpu_data()[0] = Tripletlosstotal; }
Dtype TripletClipHingeLossLayer<Dtype>:: compute_structureloss(const vector<Blob<Dtype>*>& bottom){ Dtype Structureloss(0.0); int batch_size = bottom[0]->num() / frame_num; for (int i = 0; i < batch_size; ++i){ for (int j = 0; j < frame_num - 1; ++j){ int index_1 = i*frame_num*dim + j*dim; int index_2 = i*frame_num*dim + (j + 1)*dim; int direct = i*(frame_num - 1)*dim + j*dim; caffe_sub(dim, bottom[0]->cpu_data() + index_1, bottom[0]->cpu_data() + index_2, sub_or.mutable_cpu_data() + direct); caffe_sub(dim, bottom[1]->cpu_data() + index_1, bottom[1]->cpu_data() + index_2, sub_si.mutable_cpu_data() + direct); caffe_sub(dim, bottom[2]->cpu_data() + index_1, bottom[2]->cpu_data() + index_2, sub_di.mutable_cpu_data() + direct); // pow caffe_powx(dim, sub_or.cpu_data() + direct, Dtype(2.0), pow_sub_or.mutable_cpu_data() + direct); caffe_powx(dim, sub_si.cpu_data() + direct, Dtype(2.0), pow_sub_si.mutable_cpu_data() + direct); caffe_powx(dim, sub_di.cpu_data() + direct, Dtype(2.0), pow_sub_di.mutable_cpu_data() + direct); // plus Structureloss += (caffe_cpu_asum(dim, pow_sub_or.cpu_data() + direct) + caffe_cpu_asum(dim, pow_sub_si.cpu_data() + direct) + caffe_cpu_asum(dim, pow_sub_di.cpu_data() + direct)); } } return Structureloss / (batch_size*(frame_num - 1) * 3); }
Dtype TripletClipHingeLossLayer<Dtype>::compute_tripletloss(int batchsize, int Dimv){ Dtype Tripletlosstotal(0.0); const Dtype* sub_or_si; const Dtype* sub_or_di; //The triplet ranking loss caffe_sub(Dimv, ave_or.cpu_data(), ave_si.cpu_data(), diff_sub_or_si.mutable_cpu_data()); // F-F+ caffe_sub(Dimv, ave_or.cpu_data(), ave_di.cpu_data(), diff_sub_or_di.mutable_cpu_data()); // F-F- caffe_powx(Dimv, diff_sub_or_si.cpu_data(), Dtype(2.0), diff_pow_or_si.mutable_cpu_data()); //Pow caffe_powx(Dimv, diff_sub_or_di.cpu_data(), Dtype(2.0), diff_pow_or_di.mutable_cpu_data()); //Pow for (int n = 0; n < batchsize; n++) { sub_or_si = diff_pow_or_si.cpu_data() + diff_pow_or_si.offset(n); sub_or_di = diff_pow_or_di.cpu_data() + diff_pow_or_di.offset(n); Dtype result1 = 0; Dtype result2 = 0; result1 = caffe_cpu_asum(dim, sub_or_si); result2 = caffe_cpu_asum(dim, sub_or_di); Dtype loss(0.0); loss = std::max(margin + result1 - result2, Dtype(FLT_MIN));// compute the loss diff_.mutable_cpu_data()[n] = loss; // save the loss[i] } for (int k = 0; k < batchsize; k++) { dist_sq_.mutable_cpu_data()[k] = diff_.cpu_data()[k];// save the loss[i] for BP Tripletlosstotal += dist_sq_.cpu_data()[k]; } return Tripletlosstotal / static_cast<Dtype>(batchsize); }
void HingeLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* label = bottom[1]->cpu_data(); int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; caffe_copy(count, bottom_data, bottom_diff); for (int i = 0; i < num; ++i) { bottom_diff[i * dim + static_cast<int>(label[i])] *= -1; } for (int i = 0; i < num; ++i) { for (int j = 0; j < dim; ++j) { bottom_diff[i * dim + j] = std::max( Dtype(0), 1 + bottom_diff[i * dim + j]); } } Dtype* loss = top[0]->mutable_cpu_data(); switch (this->layer_param_.hinge_loss_param().norm()) { case HingeLossParameter_Norm_L1: loss[0] = caffe_cpu_asum(count, bottom_diff) / num; break; case HingeLossParameter_Norm_L2: loss[0] = caffe_cpu_dot(count, bottom_diff, bottom_diff) / num; break; default: LOG(FATAL)<< "Unknown Norm"; } }
Dtype Blob<Dtype>::asum_diff() const { if (!diff_) { return 0; } switch (diff_->head()) { case SyncedMemory::HEAD_AT_CPU: return caffe_cpu_asum(count_, cpu_diff()); case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: { #ifndef CPU_ONLY if (device_->backend() == Backend::BACKEND_CUDA) { #ifdef USE_CUDA Dtype asum; caffe_gpu_asum(count_, gpu_diff(), &asum); return asum; #endif } else { #ifdef USE_GREENTEA Dtype asum; greentea_gpu_asum(device_->id(), count_, (cl_mem) gpu_diff(), 0, &asum); return asum; #endif } #else NO_GPU; #endif } case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL)<< "Unknown SyncedMemory head state: " << diff_->head(); } return 0; }
void ReductionLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* mult_data = NULL; if (sum_multiplier_.count() > 0) { mult_data = sum_multiplier_.cpu_data(); } Dtype* top_data = top[0]->mutable_cpu_data(); for (int i = 0; i < num_; ++i) { switch (op_) { case ReductionParameter_ReductionOp_SUM: case ReductionParameter_ReductionOp_MEAN: *top_data = caffe_cpu_dot(dim_, mult_data, bottom_data); break; case ReductionParameter_ReductionOp_ASUM: *top_data = caffe_cpu_asum(dim_, bottom_data); break; case ReductionParameter_ReductionOp_SUMSQ: *top_data = caffe_cpu_dot(dim_, bottom_data, bottom_data); break; default: LOG(FATAL) << "Unknown reduction op: " << ReductionParameter_ReductionOp_Name(op_); } bottom_data += dim_; ++top_data; } if (coeff_ != Dtype(1)) { // Reset the top_data pointer. top_data = top[0]->mutable_cpu_data(); caffe_scal(num_, coeff_, top_data); } }
void MyAccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype RMSE_lin = 0; int count = bottom[0]->count(); // weighting caffe_mul(count, bottom[0]->cpu_data(), bottom[2]->cpu_data(), bottom[0]->mutable_cpu_data()); caffe_mul(count, bottom[1]->cpu_data(), bottom[2]->cpu_data(), bottom[1]->mutable_cpu_data()); // rescaling caffe_exp(count, bottom[0]->cpu_data(), bottom[0]->mutable_cpu_data()); caffe_exp(count, bottom[1]->cpu_data(), bottom[1]->mutable_cpu_data()); // diff caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); // sum(diff^2) Dtype ss = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); // n Dtype n = caffe_cpu_asum(count, bottom[2]->cpu_data()); n += std::numeric_limits<Dtype>::min(); // sqrt(ss/n) RMSE_lin = sqrt(ss/n); top[0]->mutable_cpu_data()[0] = RMSE_lin; }
void L1LossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); Dtype asum = caffe_cpu_asum(count, diff_.cpu_data()); Dtype loss = asum / count ; (*top)[0]->mutable_cpu_data()[0] = loss; }
Dtype Blob<Dtype>::asum_diff() const { if (!diff_) { return 0; } switch (diff_->head()) { case SyncedMemory::HEAD_AT_CPU: return caffe_cpu_asum(count_, cpu_diff()); case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head(); } return 0; }
void SmoothL1LossOHEMLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); // d := b0 - b1 if (has_weights_) { caffe_mul( count, bottom[2]->cpu_data(), diff_.cpu_data(), diff_.mutable_cpu_data()); // d := w * (b0 - b1) } #ifdef _OPENMP #pragma omp parallel for #endif for (int index = 0; index < count; index++) { Dtype val = diff_.cpu_data()[index]; Dtype abs_val = abs(val); if (abs_val < 1) { errors_.mutable_cpu_data()[index] = 0.5 * val * val; } else { errors_.mutable_cpu_data()[index] = abs_val - 0.5; } } Dtype loss = caffe_cpu_asum(count, errors_.cpu_data()); Dtype pre_fixed_normalizer = this->layer_param_.loss_param().pre_fixed_normalizer(); top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, pre_fixed_normalizer); // Output per-instance loss if (top.size() >= 2) { #ifdef _OPENMP #pragma omp parallel for collapse(2) #endif for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; j++) { Dtype sum = 0; for (int c = 0; c < bottom[0]->channels(); ++c) { sum += errors_.cpu_data()[(i * bottom[0]->channels() + c) * inner_num_ + j]; } top[1]->mutable_cpu_data()[i * inner_num_ + j] = sum; } } } }
void AbsdistLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); Dtype abs; abs = caffe_cpu_asum(count, diff_.cpu_data()); Dtype loss = abs / bottom[0]->num(); top[0]->mutable_cpu_data()[0] = loss; for(int i = 0; i < count; i++){ diff_.mutable_cpu_data()[i] = diff_.cpu_data()[i] > 0.0 ? 1.0 : -1.0; } }
void SoftmaxWithLossOHEMLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // The forward pass computes the softmax prob values. softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; Dtype* loss_data = bottom[0]->mutable_cpu_diff(); int count = 0; Dtype loss = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; j++) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); DCHECK_LT(label_value, prob_.shape(softmax_axis_)); // loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], // Dtype(FLT_MIN))); loss_data[i*inner_num_+j] = -log(std::max(prob_data[i * dim + label_value * inner_num_ + j], Dtype(FLT_MIN))); ++count; } } loss = caffe_cpu_asum(count, loss_data); top[0]->mutable_cpu_data()[0] = loss / this->get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); } if (top.size() >= 3) { // Output per-instance loss caffe_copy(top[2]->count(), loss_data, top[2]->mutable_cpu_data()); } // Fix a bug, which happens when propagate_down[0] = false in backward caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff()); }
void AbsLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); // do difference and store for backprob caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); Dtype loss = caffe_cpu_asum(count, diff_.cpu_data()) / bottom[0]->num(); //Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); //Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss ; }
Dtype HingeLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* label = bottom[1]->cpu_data(); int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; caffe_copy(count, bottom_data, bottom_diff); for (int i = 0; i < num; ++i) { bottom_diff[i * dim + static_cast<int>(label[i])] *= -1; } for (int i = 0; i < num; ++i) { for (int j = 0; j < dim; ++j) { bottom_diff[i * dim + j] = max(Dtype(0), 1 + bottom_diff[i * dim + j]); } } return caffe_cpu_asum(count, bottom_diff) / num; }
Dtype Tensor<Dtype>::asum() const { if (!mem_) { return 0; } switch (mem_->head()) { case SyncedMemory::HEAD_AT_CPU: return caffe_cpu_asum(count_, cpu_mem()); case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY { Dtype asum; caffe_gpu_asum(count_, gpu_mem(), &asum); return asum; } #else NO_GPU; #endif case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << mem_->head(); } return 0; }
void ContrastiveLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), // a bottom[1]->cpu_data(), // b diff_.mutable_cpu_data()); // a_i-b_i const int channels = bottom[0]->channels(); /* * margin refers to the maximum value of energy -- parameter Q in the paper */ printf("CLL : the values of a_i are \n"); for (int i = 0; i < bottom[0]->num(); ++i) { for (int j = 0; j < channels; ++j) { printf("%f \t ",(float) bottom[0]->cpu_data()[i*channels+j] ); } } printf("CLL : End printing values of a_i\n"); printf("CLL : the values of b_i are \n"); for (int i = 0; i < bottom[1]->num(); ++i) { for (int j = 0; j < channels; ++j) { printf("%f \t ",(float) bottom[1]->cpu_data()[i*channels+j] ); } } printf("CLL : End printing values of b_i\n"); printf("CLL : the diff values for the input vector are \n"); for(int temp = 0 ; temp < count ; temp++){ printf("%f \t ",(float) diff_.mutable_cpu_data()[temp] ); } printf("CLL : End printing the diff values\n"); Dtype margin = this->layer_param_.contrastive_loss_param().margin(); //margin = Dtype(1000); Dtype loss(0.0); for (int i = 0; i < bottom[0]->num(); ++i) { dist_sq_.mutable_cpu_data()[i] = caffe_cpu_asum(channels, diff_.cpu_data() + (i*channels)); printf("CLL : values of L1 norm are , %f \n", (float) dist_sq_.mutable_cpu_data()[i] ); /* * 1 is similar pair, 0 is impostor pair. * The paper follows opposite notation */ printf("CLL: label : %d \n", bottom[2]->cpu_data()[i]); if (static_cast<int>(bottom[2]->cpu_data()[i])) { // similar pairs loss += Dtype(2) / margin * dist_sq_.cpu_data()[i] * dist_sq_.cpu_data()[i]; printf(" CLL: loss computed : %f\n", dist_sq_.cpu_data()[i]); } else { // dissimilar pairs //loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0)); printf("CLL : the exponent of 1 is : %f \n",exp(Dtype(1))); printf("CLL : the exponent of -1 is : %f \n", exp(Dtype(-1))); loss += Dtype(2) * margin * exp(-Dtype(2.77) / margin * dist_sq_.cpu_data()[i]); printf(" CLL: loss computed : %f\n", dist_sq_.cpu_data()[i]); } printf("CLL: value of label : %d \n", static_cast<int>(bottom[2]->cpu_data()[i])); printf("CLL: value of margin : %f \n", (float) margin); } loss = loss / static_cast<Dtype>(bottom[0]->num()); printf("CLL: value of loss : %f \n", loss); (*top)[0]->mutable_cpu_data()[0] = loss; }
void caffe_cpu_zero_mean(const int N, Dtype* Y) { Dtype mn; mn = caffe_cpu_asum(N, Y); mn = -(mn / N); caffe_add_scalar(N, mn, Y); }