void ReductionLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* mult_data = NULL; if (sum_multiplier_.count() > 0) { mult_data = sum_multiplier_.cpu_data(); } Dtype* top_data = top[0]->mutable_cpu_data(); for (int i = 0; i < num_; ++i) { switch (op_) { case ReductionParameter_ReductionOp_SUM: case ReductionParameter_ReductionOp_MEAN: *top_data = caffe_cpu_dot(dim_, mult_data, bottom_data); break; case ReductionParameter_ReductionOp_ASUM: *top_data = caffe_cpu_asum(dim_, bottom_data); break; case ReductionParameter_ReductionOp_SUMSQ: *top_data = caffe_cpu_dot(dim_, bottom_data, bottom_data); break; default: LOG(FATAL) << "Unknown reduction op: " << ReductionParameter_ReductionOp_Name(op_); } bottom_data += dim_; ++top_data; } if (op_ == ReductionParameter_ReductionOp_MEAN) { // Reset the top_data pointer. top_data = top[0]->mutable_cpu_data(); caffe_scal(num_, Dtype(1) / dim_, top_data); } if (coeff_ != Dtype(1)) { // Reset the top_data pointer. top_data = top[0]->mutable_cpu_data(); caffe_scal(num_, coeff_, top_data); } }
virtual void Fill(Blob<Dtype>* blob) { CHECK(blob->count()); int fan_in = blob->count() / blob->num(); int n = fan_in; // default to fan_in caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), 1, blob->mutable_cpu_data()); Dtype sum_sq; for (int i = 0; i < blob->num(); i++) { sum_sq = caffe_cpu_dot(n, blob->cpu_data() + i * n, blob->cpu_data() + i * n) + 1e-12; caffe_cpu_scale<Dtype>(n, Dtype(1.0) / sqrt(sum_sq), blob->cpu_data() + i * n, blob->mutable_cpu_data() + i * n); } CHECK_EQ(this->filler_param_.sparse(), -1) << "Sparsity not supported by this Filler."; }
void L2NormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* norm_scale = norm_.cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int n = top[0]->num(); const int d = top[0]->count() / n; caffe_copy(bottom[0]->count(), top_diff, bottom_diff); for (int i=0; i<n; ++i) { Dtype a = caffe_cpu_dot(d, top_data+i*d, top_diff+i*d); caffe_cpu_axpby(d, Dtype(-1) * a * norm_scale[i], top_data + i*d, norm_scale[i], bottom_diff + i*d); } }
void EuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); if (bottom.size() == 3) { caffe_mul(count, bottom[2]->cpu_data(), diff_.cpu_data(), diff_.mutable_cpu_data()); } Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; }
Dtype Blob<Dtype>::sumsq_diff() const { Dtype sumsq; const Dtype* diff; if (!diff_) { return 0; } switch (diff_->head()) { case SyncedMemory::HEAD_AT_CPU: diff = cpu_diff(); sumsq = caffe_cpu_dot(count_, diff, diff); break; case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); } return sumsq; }
void MultiboxBboxLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; int num_predicted_boxes = dim / COORDS_PER_BOX; const Dtype* prediction = bottom[0]->cpu_data(); const Dtype* groundtruth_bboxes = bottom[1]->cpu_data(); const Dtype* bipartite_match = bottom[2]->cpu_data(); // bipartite matching code int n; for (n=0; n<num; n++) { if (n<1) { LOG(INFO) << "MBOX_BBOX_LOSS: bipartite_match={" << bipartite_match[0]<< "," << bipartite_match[1]<< "," << bipartite_match[2]<< "," << bipartite_match[3]<< "}"; } for (int i=0; i < num_predicted_boxes; i++) { int matching_groundtruth_box = bipartite_match[i]; // backpropagate only if match if (matching_groundtruth_box == -1) { for (int x=0; x<COORDS_PER_BOX; x++) { diff_.mutable_cpu_data()[n*dim+i*COORDS_PER_BOX+x] = 0; } } else { for (int x=0; x<COORDS_PER_BOX; x++) { Dtype gt_coord = groundtruth_bboxes[n*dim+i*COORDS_PER_BOX+x]; Dtype net_coord = prediction[n*dim+matching_groundtruth_box*COORDS_PER_BOX+x]; diff_.mutable_cpu_data()[n*dim+i] = net_coord - gt_coord; if (n<1) { LOG(INFO) << "MBOX_BBOX_LOSS: loss[n="<<n<<"][i="<<i<<"]="<<diff_.mutable_cpu_data()[n*dim+i]; } } } } } Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; LOG(INFO) << "MBOX_BBOX_LOSS: loss=" << loss; }
void RmseLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { num_rating_ = bottom[2]->cpu_data()[0]; int count = bottom[0]->count(); CHECK_LE(num_rating_, count) << "assigned rating length exceed boundary."; // const Dtype* data = bottom[0]->cpu_data(); // std::cout << "data" << std::endl; // for (int i = 0; i < 10; i++){ // std::cout << data[i] << "\t"; // } // std::cout << std::endl; // const Dtype* label = bottom[1]->cpu_data(); // std::cout << "label" << std::endl; // for (int i = 0; i < 10; i++){ // std::cout << label[i] << "\t"; // } // std::cout << std::endl; caffe_sub( num_rating_, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); if (bias_!=0) { caffe_add_scalar(num_rating_, bias_, diff_.mutable_cpu_data()); } // const Dtype* diff_cpu = diff_.cpu_data(); // std::cout << "diff_cpu" << std::endl; // for (int i = 0; i < 10; i++){ // std::cout << diff_cpu[i] << "\t"; // } // std::cout << std::endl; Dtype dot = caffe_cpu_dot(num_rating_, diff_.cpu_data(), diff_.cpu_data()); // std::cout << "dot:" << dot << std::endl; // Dtype loss = dot / bottom[0]->num() / Dtype(2); Dtype loss = sqrt(dot / num_rating_); // rmse, temp for movielens. (*top)[0]->mutable_cpu_data()[0] = loss; // LOG(INFO) << "loss:" << loss << " num_rating_:" << num_rating_; }
void EuclideanSimilarityLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); Dtype * sim = top[0]->mutable_cpu_data(); const Dtype * pd = diff_.cpu_data(); for (int i=0; i<num; ++i) { sim[i] = caffe_cpu_dot(dim, pd, pd); pd += dim; } caffe_cpu_scale(num, Dtype(-1.0), sim, sim); }
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Lock during forward to ensure sequential forward Lock(); Dtype loss = 0; Reshape(bottom, top); #ifdef USE_MLSL if (Bypass(bottom, top)) { Unlock(); return loss; } #endif switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif break; default: LOG(FATAL) << "Unknown caffe mode."; } Unlock(); return loss; }
Dtype EuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); const Dtype* b1 = bottom[0]->cpu_data(); const Dtype* b2 = bottom[1]->cpu_data(); // LOG(INFO)<<"bottom 0 = "<< *b1<<" bottom 1 = "<<*b2; // Dtype dot; //LOG(INFO)<<"bottom 0"<< bottom[0]->cpu_data()<<"bottom 1"<<bottom[1]->cpu_data(); Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); //LOG(INFO)<<"dot product is "<< dot; Dtype loss = dot / bottom[0]->num() / Dtype(2); if (top->size() == 1) { (*top)[0]->mutable_cpu_data()[0] = loss; } return loss; }
void DepthLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); Dtype* bottom0_log = new Dtype(count); Dtype* bootom1_log = new Dtype(count); //compute log for Y and Y* for (int i = 0; i < count; i++) { bottom0_log[i] = log(bottom[0]->cpu_data()[i]); bootom1_log[i] = log(bottom[0]->cpu_data()[i]); } //compute logY - logY* caffe_sub( count, bottom0_log, bootom1_log, diff_.mutable_cpu_data()); //part1 of Loss Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); //part2 of Loss Dtype log_sum = Dtype(0); Dtype* diff_data = diff_.mutable_cpu_data(); for (int i = 0; i < count; i++) { log_sum += diff_data[i]; } //double gamma = 0.5; Dtype loss = dot / bottom[0]->num() - gamma * log_sum * log_sum / bottom[0]->num() / bottom[0]->num(); top[0]->mutable_cpu_data()[0] = loss; //free memory space delete bottom0_log; delete bootom1_log; }
Dtype Blob<Dtype>::sumsq_diff() const { Dtype sumsq; const Dtype* diff; if (!diff_) { return 0; } switch (diff_->head()) { case SyncedMemory::HEAD_AT_CPU: { diff = cpu_diff(); sumsq = caffe_cpu_dot(count_, diff, diff); break; } case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: { #ifndef CPU_ONLY diff = gpu_diff(); if (device_->backend() == Backend::BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_dot(count_, diff, diff, &sumsq); #endif } else { #ifdef USE_GREENTEA greentea_gpu_dot(device_->id(), count_, (cl_mem) diff, 0, (cl_mem) diff, 0, &sumsq); #endif } #else NO_GPU; #endif break; } case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL)<< "Unknown SyncedMemory head state: " << data_->head(); } return sumsq; }
void ContrastiveLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); caffe_sub( count, bottom[0]->cpu_data(), // a bottom[1]->cpu_data(), // b diff_.mutable_cpu_data()); // a_i-b_i const int channels = bottom[0]->channels(); Dtype margin = this->layer_param_.contrastive_loss_param().margin(); Dtype loss(0.0); for (int i = 0; i < bottom[0]->num(); ++i) { dist_sq_.mutable_cpu_data()[i] = caffe_cpu_dot(channels, diff_.cpu_data() + (i*channels), diff_.cpu_data() + (i*channels)); if (static_cast<int>(bottom[2]->cpu_data()[i])) { // similar pairs loss += dist_sq_.cpu_data()[i]; } else { // dissimilar pairs loss += std::max(margin-dist_sq_.cpu_data()[i], Dtype(0.0)); } } loss = loss / static_cast<Dtype>(bottom[0]->num()) / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; }
void MultiHingeLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* label = bottom[1]->cpu_data(); int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; caffe_copy(count, bottom_data, bottom_diff); for ( int i = 0; i < num; i++) { for ( int j = 0; j < dim ; j++) { if (static_cast<int>(label[i*dim+j]) == 1) bottom_diff[i*dim+j] *= -1; } } for (int i = 0; i < num; i++) { for ( int j = 0; j < dim; j++) { bottom_diff[i*dim+j] = std::max(Dtype(0),1+bottom_diff[i*dim+j]); } } Dtype* loss = top[0]->mutable_cpu_data(); loss[0] = caffe_cpu_dot(count, bottom_diff, bottom_diff) / num; }
void EuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { for (int i = 0; i < 2; ++i) { if (propagate_down[i]) { const Dtype sign = (i == 0) ? 1 : -1; const Dtype alpha = sign * top[0]->cpu_diff()[0] / (bottom)[i]->num(); Dtype nrmlz = 1.0; int count = (bottom)[0]->count(); if (is_normalize_){ nrmlz = caffe_cpu_dot(count, (bottom)[nc_]->cpu_data(), (bottom)[nc_]->cpu_data()); if (nrmlz>0){ caffe_scal(count, Dtype(1)/nrmlz, diff_.mutable_cpu_data()); } } caffe_cpu_axpby( (bottom)[i]->count(), // count alpha, // alpha diff_.cpu_data(), // a Dtype(0), // beta bottom[i]->mutable_cpu_diff()); // b } } }
void ScaleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (bias_layer_ && this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); } const bool scale_param = (bottom.size() == 1); Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1]; if ((!scale_param && propagate_down[1]) || (scale_param && this->param_propagate_down_[0])) { const Dtype* top_diff = top[0]->cpu_diff(); const bool in_place = (bottom[0] == top[0]); const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scale diff, and we're done. // If we're computing in-place (and not doing eltwise computation), this // hack doesn't work and we store the product in temp_. const bool is_eltwise = (bottom[0]->count() == scale->count()); Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() : (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff())); caffe_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; if (inner_dim_ == 1) { sum_result = product; } else if (sum_result_.count() == 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_param) { Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); } } else { const Dtype* sum_mult = sum_multiplier_.cpu_data(); sum_result = (outer_dim_ == 1) ? scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, Dtype(1), product, sum_mult, Dtype(0), sum_result); } if (outer_dim_ != 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_dim_ == 1) { if (scale_param) { Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); } } else { caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_, Dtype(1), sum_result, sum_mult, Dtype(scale_param), scale_diff); } } } } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* scale_data = scale->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scale_dim_; ++d) { const Dtype factor = scale_data[d]; caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); bottom_diff += inner_dim_; top_diff += inner_dim_; } } } }
void CoupledClusterLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { pos_ids = std::vector<std::vector<int> >(group_num, std::vector<int>()); neg_ids = std::vector<std::vector<int> >(group_num, std::vector<int>()); pos_backward = std::vector<bool>(group_num*N, false); neg_backward = std::vector<bool>(group_num*N, false); const Dtype *feat_ptr = bottom[0]->cpu_data(); const Dtype *label_ptr = bottom[1]->cpu_data(); Dtype *diff_ptr_ = diff_.mutable_cpu_data(); Dtype loss(0); caffe_set(feat_len*group_num, Dtype(0), pos_center_.mutable_cpu_data()); int cnt = 0; /* i -> group index */ for(int i=0; i<group_num; ++i) { /* search for the positive id */ std::set<Dtype> labels; Dtype anchor_id = -1; for(int j=0; j<N; ++j) { Dtype tmp = label_ptr[N*i+j]; if(labels.count(tmp)>0) { anchor_id = tmp; break; } else labels.insert(tmp); } // CHECK_NE(anchor_id, -1); /* collect for positive and negative ids, compute the center of positive samples */ for(int j=0; j<N; ++j) { if(label_ptr[i*N+j]==anchor_id){ pos_ids[i].push_back(j); caffe_add(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i); } else neg_ids[i].push_back(j); } caffe_cpu_scale(feat_len, Dtype(1)/pos_ids[i].size(), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i); if(neg_ids[i].size()==0 || pos_ids[i].size()<=1) continue; Dtype pos_mdist = Dtype(0); Dtype neg_min_val = -1; Dtype pos_max_val = -1; for(int j=0; j<N; ++j) { // f[j]-center caffe_sub(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.cpu_data()+feat_len*i, diff_ptr_+feat_len*(i*N+j)); if(scale!=1) caffe_cpu_scale(feat_len, scale, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j)); Dtype d = caffe_cpu_dot(feat_len, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j)); if(log_flag) LOG(INFO) << "i " << i << ", j " << j << ", d " << d; dist_sq_.mutable_cpu_data()[i*N+j] = d; if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0 && (neg_min_val==-1 || d<neg_min_val)) neg_min_val = d; else if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)==0 && (pos_max_val==-1 || d>pos_max_val)) pos_max_val = d; } for(int j=0; j<N; ++j) { if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0) { Dtype d = dist_sq_.cpu_data()[i*N+j]; Dtype mdist = std::max(-d+margin+pos_max_val, Dtype(0)); if(log_flag) LOG(INFO) << "j=" << j << ", d=" << d << ", pos_max_val=" << pos_max_val << ", mdist=" << mdist; if(mdist>0) neg_backward[i*N+j] = true; } else { Dtype d = dist_sq_.cpu_data()[i*N+j]; Dtype mdist = std::max(d+margin-neg_min_val, Dtype(0)); if(log_flag) LOG(INFO) << "j=" << j << ", d=" << d << ", neg_min_val=" << neg_min_val << ", mdist=" << mdist; if(mdist>0) pos_backward[i*N+j] = true; pos_mdist += mdist; } } /* average punishment */ pos_mdist /= pos_ids[i].size(); // pos_mdist *= 2; if(log_flag) LOG(INFO) << "pos_mdist " << pos_mdist << ", neg_min_val " << neg_min_val; CHECK_GE(pos_ids[i].size(), 2); CHECK_GE(neg_ids[i].size(), 1); loss += pos_mdist; ++cnt; } loss = loss / cnt; top[0]->mutable_cpu_data()[0] = loss; }
void SparseDepthMahalanobisLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int count = bottom[0]->count(); int num = bottom[0]->num(); int height = bottom[0]->height(); int width = bottom[0]->width(); int spatial_count = height*width; Dtype* diff = diff_.mutable_cpu_data(); const Dtype* label = bottom[1]->cpu_data(); // compute the difference caffe_sub( count, bottom[0]->cpu_data(), // a bottom[1]->cpu_data(), // b diff_.mutable_cpu_data()); // a_i-b_i // set diff_ = 0 if groundtruth data is missing // the channel in the bottom labels blob == -10.0 // implies that the groundtruth data is missing for (int n = 0; n < num; ++n) { for (int i = 0; i < spatial_count; ++i) { Dtype mask = *(label + bottom[1]->offset(n) + i); if (mask == Dtype(MASK_VAL)) { *(diff + bottom[1]->offset(n) + i) = Dtype(0); } } } if (bottom.size() >= 3) { // weighted distance // TODO(NCB) is there a more efficient way to do this? Dtype reg(0); Dtype U; for (int n = 0; n < num; ++n) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { int offset = n*spatial_count + h*width + w; Dtype mask = *(label + bottom[1]->offset(n,0,h,w)); if (mask == Dtype(MASK_VAL)) { U = Dtype(1.0); } else U = fabs(bottom[2]->cpu_data()[offset]); // Udiff Udiff_.mutable_cpu_data()[offset] = U*diff_.cpu_data()[offset]; // UtUdiff UtUdiff_.mutable_cpu_data()[offset] = U*Udiff_.cpu_data()[offset]; // compute regularizer reg += log(U + Dtype(EPS)); } //for w } //for h } //for n // difftUtUdiff Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), UtUdiff_.cpu_data()); Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss - reg / bottom[0]->num(); } else { // unweighted distance Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data()); Dtype loss = dot / bottom[0]->num() / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; } }