void DotProductSimilarityLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { int count = bottom[0]->count(); int num = bottom[0]->num(); int dim = count / num; const Dtype * top_diff = top[0]->cpu_diff(); for (int i=0; i<num; ++i) { caffe_cpu_scale(dim, top_diff[i], bottom[0]->cpu_data() + bottom[0]->offset(i), bottom[1]->mutable_cpu_diff() + bottom[1]->offset(i)); caffe_cpu_scale(dim, top_diff[i], bottom[1]->cpu_data() + bottom[1]->offset(i), bottom[0]->mutable_cpu_diff() + bottom[0]->offset(i)); } }
void ScaleLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); if (bottom[0] == top[0]) { // In-place computation; need to store bottom data before overwriting it. // Note that this is only necessary for Backward; we could skip this if not // doing Backward, but Caffe currently provides no way of knowing whether // we'll need to do Backward at the time of the Forward call. caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(), temp_.mutable_cpu_data()); } const Dtype* scale_data = ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scale_dim_; ++d) { const Dtype factor = scale_data[d]; caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data); bottom_data += inner_dim_; top_data += inner_dim_; } } if (bias_layer_) { bias_layer_->Forward(bias_bottom_vec_, top); } }
void NormalizeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); int n = top[0]->num(); int d = top[0]->count() / n; for (int i=0; i<n; ++i) { Dtype a = caffe_cpu_dot(d, top_data+i*d, top_diff+i*d); caffe_cpu_scale(d, a, top_data+i*d, bottom_diff+i*d); caffe_sub(d, top_diff+i*d, bottom_diff+i*d, bottom_diff+i*d); a = caffe_cpu_dot(d, bottom_data+i*d, bottom_data+i*d); caffe_cpu_scale(d, Dtype(pow(a, -0.5)), bottom_diff+i*d, bottom_diff+i*d); } }
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const bool propagate_down, vector<Blob<Dtype>*>* bottom) { if (propagate_down) { const int count = top[0]->count(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); for (int i = 0; i < bottom->size(); ++i) { const Dtype* bottom_data = (*bottom)[i]->cpu_data(); Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_div(count, top_data, bottom_data, bottom_diff); caffe_mul(count, bottom_diff, top_diff, bottom_diff); break; case EltwiseParameter_EltwiseOp_SUM: if (coeffs_[i] == Dtype(1)) { caffe_copy(count, top_diff, bottom_diff); } else { caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff); } break; default: LOG(FATAL) << "Unknown elementwise operation."; } } } }
void EuclideanSimilarityLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; const Dtype * pd = diff_.cpu_data(); const Dtype * pt = top[0]->cpu_diff(); Dtype * pa = bottom[0]->mutable_cpu_diff(); Dtype * pb = bottom[1]->mutable_cpu_diff(); for (int i=0; i<num; ++i) { caffe_cpu_scale(dim, -pt[i], pd, pa); caffe_cpu_scale(dim, pt[i], pd, pb); pd += dim; pa += dim; pb += dim; } }
void TripletLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { //Dtype scalar = top[0]->cpu_diff()[0] / triplet_num_; Dtype scalar = top[0]->cpu_diff()[0] / sample_num_; caffe_cpu_scale(bottom_diff_.count(), scalar, bottom_diff_.cpu_data(), bottom[0]->mutable_cpu_diff()); } }
void TripletLossLayer<Dtype>::ComputeDiff_cpu(const Dtype *x_1, const Dtype *x_2, const Dtype x_1_norm, const Dtype x_2_norm, const Dtype inner_val, Dtype *x_1_diff) { caffe_cpu_scale(feature_dim_, Dtype(1) / (x_1_norm * x_2_norm), x_2, x_1_diff); Dtype x_1_norm_cubic = x_1_norm * x_1_norm * x_1_norm; caffe_cpu_axpby(feature_dim_, -inner_val / (x_1_norm_cubic * x_2_norm), x_1, Dtype(1), x_1_diff); }
void GradientScalerLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { const int count = bottom[0]->count(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); caffe_cpu_scale(count, Dtype(-coeff_), top_diff, bottom_diff); } }
TYPED_TEST(BlobMathTest, TestSumOfSquares) { typedef typename TypeParam::Dtype Dtype; // Uninitialized Blob should have sum of squares == 0. EXPECT_EQ(0, this->blob_->sumsq_data()); EXPECT_EQ(0, this->blob_->sumsq_diff()); FillerParameter filler_param; filler_param.set_min(-3); filler_param.set_max(3); UniformFiller<Dtype> filler(filler_param); filler.Fill(this->blob_); Dtype expected_sumsq = 0; const Dtype* data = this->blob_->cpu_data(); for (int i = 0; i < this->blob_->count(); ++i) { expected_sumsq += data[i] * data[i]; } // Do a mutable access on the current device, // so that the sumsq computation is done on that device. // (Otherwise, this would only check the CPU sumsq implementation.) switch (TypeParam::device) { case Engine::CPU: this->blob_->mutable_cpu_data(); break; case Engine::GPU: this->blob_->mutable_gpu_data(); break; default: LOG(FATAL) << "Unknown device: " << TypeParam::device; } EXPECT_NEAR(expected_sumsq, this->blob_->sumsq_data(), this->epsilon_ * expected_sumsq); EXPECT_EQ(0, this->blob_->sumsq_diff()); // Check sumsq_diff too. const Dtype kDiffScaleFactor = 7; caffe_cpu_scale(this->blob_->count(), kDiffScaleFactor, data, this->blob_->mutable_cpu_diff()); switch (TypeParam::device) { case Engine::CPU: this->blob_->mutable_cpu_diff(); break; case Engine::GPU: this->blob_->mutable_gpu_diff(); break; default: LOG(FATAL) << "Unknown device: " << TypeParam::device; } EXPECT_NEAR(expected_sumsq, this->blob_->sumsq_data(), this->epsilon_ * expected_sumsq); const Dtype expected_sumsq_diff = expected_sumsq * kDiffScaleFactor * kDiffScaleFactor; EXPECT_NEAR(expected_sumsq_diff, this->blob_->sumsq_diff(), this->epsilon_ * expected_sumsq_diff); }
void ScalarLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scalar diff, and we're done. const bool is_eltwise = (inner_dim_ == 1 && outer_dim_ == 1); Dtype* product = is_eltwise ? bottom[1]->mutable_cpu_diff() : bottom[0]->mutable_cpu_diff(); caffe_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; if (inner_dim_ == 1) { sum_result = product; } else if (sum_result_.count() == 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scalar_diff = bottom[1]->mutable_cpu_diff(); *scalar_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); } else { const Dtype* sum_mult = sum_multiplier_.cpu_data(); sum_result = (outer_dim_ == 1) ? bottom[1]->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, Dtype(1), product, sum_mult, Dtype(0), sum_result); } if (outer_dim_ != 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scalar_diff = bottom[1]->mutable_cpu_diff(); if (scalar_dim_ == 1) { *scalar_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); } else { caffe_cpu_gemv(CblasTrans, outer_dim_, scalar_dim_, Dtype(1), sum_result, sum_mult, Dtype(0), scalar_diff); } } } } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* scalar_data = bottom[1]->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scalar_dim_; ++d) { const Dtype factor = scalar_data[d]; caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); bottom_diff += inner_dim_; top_diff += inner_dim_; } } } }
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const int* mask = NULL; const int count = top[0]->count(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); for (int i = 0; i < bottom.size(); ++i) { if (propagate_down[i]) { const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: if (stable_prod_grad_) { bool initialized = false; for (int j = 0; j < bottom.size(); ++j) { if (i == j) { continue; } if (!initialized) { caffe_copy(count, bottom[j]->cpu_data(), bottom_diff); initialized = true; } else { caffe_mul(count, bottom[j]->cpu_data(), bottom_diff, bottom_diff); } } } else { caffe_div(count, top_data, bottom_data, bottom_diff); } caffe_mul(count, bottom_diff, top_diff, bottom_diff); break; case EltwiseParameter_EltwiseOp_SUM: if (coeffs_[i] == Dtype(1)) { caffe_copy(count, top_diff, bottom_diff); } else { caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff); } break; case EltwiseParameter_EltwiseOp_MAX: mask = max_idx_.cpu_data(); for (int index = 0; index < count; ++index) { Dtype gradient = 0; if (mask[index] == i) { gradient += top_diff[index]; } bottom_diff[index] = gradient; } break; default: LOG(FATAL) << "Unknown elementwise operation."; } } } }
void ScalarLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* scalar_data = bottom[1]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scalar_dim_; ++d) { const Dtype factor = scalar_data[d]; caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data); bottom_data += inner_dim_; top_data += inner_dim_; } } }
void ExpLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int count = bottom[0]->count(); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); if (inner_scale_ == Dtype(1)) { caffe_exp(count, bottom_data, top_data); } else { caffe_cpu_scale(count, inner_scale_, bottom_data, top_data); caffe_exp(count, top_data, top_data); } if (outer_scale_ != Dtype(1)) { caffe_scal(count, outer_scale_, top_data); } }
void ReductionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } // Get bottom_data, if needed. const Dtype* bottom_data = NULL; switch (op_) { // Operations that don't need bottom_data case ReductionParameter_ReductionOp_SUM: case ReductionParameter_ReductionOp_MEAN: break; // Operations that need bottom_data case ReductionParameter_ReductionOp_ASUM: case ReductionParameter_ReductionOp_SUMSQ: bottom_data = bottom[0]->cpu_data(); break; default: LOG(FATAL) << "Unknown reduction op: " << ReductionParameter_ReductionOp_Name(op_); } const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int i = 0; i < num_; ++i) { Dtype bottom_coeff = (*top_diff) * coeff_; if (op_ == ReductionParameter_ReductionOp_MEAN) { bottom_coeff /= dim_; } switch (op_) { case ReductionParameter_ReductionOp_SUM: case ReductionParameter_ReductionOp_MEAN: caffe_set(dim_, bottom_coeff, bottom_diff); break; case ReductionParameter_ReductionOp_ASUM: caffe_cpu_sign(dim_, bottom_data, bottom_diff); caffe_scal(dim_, bottom_coeff, bottom_diff); break; case ReductionParameter_ReductionOp_SUMSQ: caffe_cpu_scale(dim_, 2 * bottom_coeff, bottom_data, bottom_diff); break; default: LOG(FATAL) << "Unknown reduction op: " << ReductionParameter_ReductionOp_Name(op_); } bottom_data += dim_; bottom_diff += dim_; ++top_diff; } }
void EuclideanSimilarityLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int num = bottom[0]->num(); int count = bottom[0]->count(); int dim = count / num; caffe_sub( count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); Dtype * sim = top[0]->mutable_cpu_data(); const Dtype * pd = diff_.cpu_data(); for (int i=0; i<num; ++i) { sim[i] = caffe_cpu_dot(dim, pd, pd); pd += dim; } caffe_cpu_scale(num, Dtype(-1.0), sim, sim); }
void ScaleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (bias_layer_ && this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); } const bool scale_param = (bottom.size() == 1); Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1]; if ((!scale_param && propagate_down[1]) || (scale_param && this->param_propagate_down_[0])) { const Dtype* top_diff = top[0]->cpu_diff(); const bool in_place = (bottom[0] == top[0]); const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scale diff, and we're done. // If we're computing in-place (and not doing eltwise computation), this // hack doesn't work and we store the product in temp_. const bool is_eltwise = (bottom[0]->count() == scale->count()); Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() : (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff())); caffe_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; if (inner_dim_ == 1) { sum_result = product; } else if (sum_result_.count() == 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_param) { Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); } } else { const Dtype* sum_mult = sum_multiplier_.cpu_data(); sum_result = (outer_dim_ == 1) ? scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, Dtype(1), product, sum_mult, Dtype(0), sum_result); } if (outer_dim_ != 1) { const Dtype* sum_mult = sum_multiplier_.cpu_data(); Dtype* scale_diff = scale->mutable_cpu_diff(); if (scale_dim_ == 1) { if (scale_param) { Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); *scale_diff += result; } else { *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); } } else { caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_, Dtype(1), sum_result, sum_mult, Dtype(scale_param), scale_diff); } } } } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* scale_data = scale->cpu_data(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); for (int n = 0; n < outer_dim_; ++n) { for (int d = 0; d < scale_dim_; ++d) { const Dtype factor = scale_data[d]; caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); bottom_diff += inner_dim_; top_diff += inner_dim_; } } } }
void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); int num = bottom[0]->shape(0); int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_); if (bottom[0] != top[0]) { caffe_copy(bottom[0]->count(), bottom_data, top_data); } if (use_global_stats_) { // use the stored mean/variance estimates. const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ? 0 : 1 / this->blobs_[2]->cpu_data()[0]; caffe_cpu_scale(variance_.count(), scale_factor, this->blobs_[0]->cpu_data(), mean_.mutable_cpu_data()); caffe_cpu_scale(variance_.count(), scale_factor, this->blobs_[1]->cpu_data(), variance_.mutable_cpu_data()); } else { // compute mean caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), bottom_data, spatial_sum_multiplier_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1., num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data()); } // subtract mean caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num, spatial_dim, 1, -1, num_by_chans_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 1., top_data); if (!use_global_stats_) { // compute variance using var(X) = E((X-EX)^2) caffe_powx(top[0]->count(), top_data, Dtype(2), temp_.mutable_cpu_data()); // (X-EX)^2 caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1. / (num * spatial_dim), temp_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1., num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0., variance_.mutable_cpu_data()); // E((X_EX)^2) // compute and save moving average this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_; this->blobs_[2]->mutable_cpu_data()[0] += 1; caffe_cpu_axpby(mean_.count(), Dtype(1), mean_.cpu_data(), moving_average_fraction_, this->blobs_[0]->mutable_cpu_data()); int m = bottom[0]->count()/channels_; Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1; caffe_cpu_axpby(variance_.count(), bias_correction_factor, variance_.cpu_data(), moving_average_fraction_, this->blobs_[1]->mutable_cpu_data()); } // normalize variance caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data()); caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5), variance_.mutable_cpu_data()); // replicate variance to input size caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1, batch_sum_multiplier_.cpu_data(), variance_.cpu_data(), 0., num_by_chans_.mutable_cpu_data()); caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num, spatial_dim, 1, 1., num_by_chans_.cpu_data(), spatial_sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data()); caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data); // TODO(cdoersch): The caching is only needed because later in-place layers // might clobber the data. Can we skip this if they won't? caffe_copy(x_norm_.count(), top_data, x_norm_.mutable_cpu_data()); }
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const int* mask = NULL; const int count = top[0]->count(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); if (broadcast_) { bool broadcasted[2]; int i, j; broadcasted[0] = broadcasted[1] = false; for (int i=0; i<4; i++) { if (bottom[0]->shape()[i] > bottom[1]->shape()[i]) broadcasted[1] = true; if (bottom[0]->shape()[i] < bottom[1]->shape()[i]) broadcasted[0] = true; } i=0; j=1; //i -> not broadcasted j-> broadcasted if (broadcasted[0]){ i=1; j=0; } int dima[4], dimb[4]; const Dtype* bot_data = bottom[i]->cpu_data(); const Dtype* bot_data_brd = bottom[j]->cpu_data(); Dtype* bot_diff = bottom[i]->mutable_cpu_diff(); Dtype* bot_diff_brd = bottom[j]->mutable_cpu_diff(); for (int n=0; n<4; n++) dima[n] = bottom[i]->shape()[n]; for (int n=0; n<4; n++) dimb[n] = bottom[j]->shape()[n]; switch(op_) { case EltwiseParameter_EltwiseOp_PROD: if (propagate_down[j]) { int n=0; for (int x=0; x<4; x++) n *= dima[x]; caffe_mul<Dtype>(n, top_diff, bot_data, bot_diff); caffe_sum_reduce<Dtype>(dima, dimb, bot_diff, bot_diff_brd); caffe_set(n, Dtype(0), bot_diff); } if (propagate_down[i]) caffe_mul_broadcast<Dtype>(dima, dimb, top_diff, bot_data_brd, bot_diff); break; case EltwiseParameter_EltwiseOp_SUM: if (propagate_down[j]) caffe_sum_reduce<Dtype>(dima, dimb, top_diff, bot_diff_brd); if (propagate_down[i]) { int n=0; for (int x=0; x<4; x++) n *= dima[x]; caffe_copy<Dtype>(n, top_diff, bot_diff); } break; default: LOG(FATAL) << "Unknown elementwise operation."; } } else { for (int i = 0; i < bottom.size(); ++i) { if (propagate_down[i]) { const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: if (stable_prod_grad_) { bool initialized = false; for (int j = 0; j < bottom.size(); ++j) { if (i == j) { continue; } if (!initialized) { caffe_copy(count, bottom[j]->cpu_data(), bottom_diff); initialized = true; } else { caffe_mul(count, bottom[j]->cpu_data(), bottom_diff, bottom_diff); } } } else { caffe_div(count, top_data, bottom_data, bottom_diff); } caffe_mul(count, bottom_diff, top_diff, bottom_diff); break; case EltwiseParameter_EltwiseOp_SUM: if (coeffs_[i] == Dtype(1)) { caffe_copy(count, top_diff, bottom_diff); } else { caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff); } break; case EltwiseParameter_EltwiseOp_MAX: mask = max_idx_.cpu_data(); for (int index = 0; index < count; ++index) { Dtype gradient = 0; if (mask[index] == i) { gradient += top_diff[index]; } bottom_diff[index] = gradient; } break; default: LOG(FATAL) << "Unknown elementwise operation."; } } } } }
void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) { const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params(); const vector<float>& net_params_lr = this->net_->params_lr(); Dtype delta = this->param_.delta(); Dtype momentum = this->param_.momentum(); Dtype local_rate = rate * net_params_lr[param_id]; size_t update_history_offset = net_params.size(); switch (Caffe::mode()) { case Caffe::CPU: { // compute square of gradient in update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history of gradients caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, this->update_[param_id]->cpu_data(), momentum, this->history_[param_id]->mutable_cpu_data()); // add delta to history to guard against dividing by zero later caffe_set(net_params[param_id]->count(), delta, this->temp_[param_id]->mutable_cpu_data()); caffe_add(net_params[param_id]->count(), this->temp_[param_id]->cpu_data(), this->history_[update_history_offset + param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); caffe_add(net_params[param_id]->count(), this->temp_[param_id]->cpu_data(), this->history_[param_id]->cpu_data(), this->temp_[param_id]->mutable_cpu_data()); // divide history of updates by history of gradients caffe_div(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), this->temp_[param_id]->cpu_data(), this->update_[param_id]->mutable_cpu_data()); // jointly compute the RMS of both for update and gradient history caffe_powx(net_params[param_id]->count(), this->update_[param_id]->cpu_data(), Dtype(0.5), this->update_[param_id]->mutable_cpu_data()); // compute the update caffe_mul(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(), net_params[param_id]->mutable_cpu_diff()); // compute square of update caffe_powx(net_params[param_id]->count(), net_params[param_id]->cpu_diff(), Dtype(2), this->update_[param_id]->mutable_cpu_data()); // update history of updates caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum, this->update_[param_id]->cpu_data(), momentum, this->history_[update_history_offset + param_id]->mutable_cpu_data()); // apply learning rate caffe_cpu_scale(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), net_params[param_id]->mutable_cpu_diff()); break; } case Caffe::GPU: { #ifndef CPU_ONLY adadelta_update_gpu(net_params[param_id]->count(), net_params[param_id]->mutable_gpu_diff(), this->history_[param_id]->mutable_gpu_data(), this->history_[update_history_offset + param_id]->mutable_gpu_data(), momentum, delta, local_rate); #else NO_GPU; #endif break; } default: LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); } }
TYPED_TEST(BlobMathTest, TestScaleData) { typedef typename TypeParam::Dtype Dtype; EXPECT_EQ(0, this->blob_->asum_data()); EXPECT_EQ(0, this->blob_->asum_diff()); FillerParameter filler_param; filler_param.set_min(-3); filler_param.set_max(3); UniformFiller<Dtype> filler(filler_param); filler.Fill(this->blob_); const Dtype asum_before_scale = this->blob_->asum_data(); // Do a mutable access on the current device, // so that the asum computation is done on that device. // (Otherwise, this would only check the CPU asum implementation.) switch (TypeParam::device) { case Engine::CPU: this->blob_->mutable_cpu_data(); break; case Engine::GPU: this->blob_->mutable_gpu_data(); break; default: LOG(FATAL) << "Unknown device: " << TypeParam::device; } const Dtype kDataScaleFactor = 3; this->blob_->scale_data(kDataScaleFactor); EXPECT_NEAR(asum_before_scale * kDataScaleFactor, this->blob_->asum_data(), this->epsilon_ * asum_before_scale * kDataScaleFactor); EXPECT_EQ(0, this->blob_->asum_diff()); // Check scale_diff too. const Dtype kDataToDiffScaleFactor = 7; const Dtype* data = this->blob_->cpu_data(); caffe_cpu_scale(this->blob_->count(), kDataToDiffScaleFactor, data, this->blob_->mutable_cpu_diff()); const Dtype expected_asum_before_scale = asum_before_scale * kDataScaleFactor; EXPECT_NEAR(expected_asum_before_scale, this->blob_->asum_data(), this->epsilon_ * expected_asum_before_scale); const Dtype expected_diff_asum_before_scale = asum_before_scale * kDataScaleFactor * kDataToDiffScaleFactor; EXPECT_NEAR(expected_diff_asum_before_scale, this->blob_->asum_diff(), this->epsilon_ * expected_diff_asum_before_scale); switch (TypeParam::device) { case Engine::CPU: this->blob_->mutable_cpu_diff(); break; case Engine::GPU: this->blob_->mutable_gpu_diff(); break; default: LOG(FATAL) << "Unknown device: " << TypeParam::device; } const Dtype kDiffScaleFactor = 3; this->blob_->scale_diff(kDiffScaleFactor); EXPECT_NEAR(asum_before_scale * kDataScaleFactor, this->blob_->asum_data(), this->epsilon_ * asum_before_scale * kDataScaleFactor); const Dtype expected_diff_asum = expected_diff_asum_before_scale * kDiffScaleFactor; EXPECT_NEAR(expected_diff_asum, this->blob_->asum_diff(), this->epsilon_ * expected_diff_asum); }
void CoupledClusterLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { pos_ids = std::vector<std::vector<int> >(group_num, std::vector<int>()); neg_ids = std::vector<std::vector<int> >(group_num, std::vector<int>()); pos_backward = std::vector<bool>(group_num*N, false); neg_backward = std::vector<bool>(group_num*N, false); const Dtype *feat_ptr = bottom[0]->cpu_data(); const Dtype *label_ptr = bottom[1]->cpu_data(); Dtype *diff_ptr_ = diff_.mutable_cpu_data(); Dtype loss(0); caffe_set(feat_len*group_num, Dtype(0), pos_center_.mutable_cpu_data()); int cnt = 0; /* i -> group index */ for(int i=0; i<group_num; ++i) { /* search for the positive id */ std::set<Dtype> labels; Dtype anchor_id = -1; for(int j=0; j<N; ++j) { Dtype tmp = label_ptr[N*i+j]; if(labels.count(tmp)>0) { anchor_id = tmp; break; } else labels.insert(tmp); } // CHECK_NE(anchor_id, -1); /* collect for positive and negative ids, compute the center of positive samples */ for(int j=0; j<N; ++j) { if(label_ptr[i*N+j]==anchor_id){ pos_ids[i].push_back(j); caffe_add(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i); } else neg_ids[i].push_back(j); } caffe_cpu_scale(feat_len, Dtype(1)/pos_ids[i].size(), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i); if(neg_ids[i].size()==0 || pos_ids[i].size()<=1) continue; Dtype pos_mdist = Dtype(0); Dtype neg_min_val = -1; Dtype pos_max_val = -1; for(int j=0; j<N; ++j) { // f[j]-center caffe_sub(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.cpu_data()+feat_len*i, diff_ptr_+feat_len*(i*N+j)); if(scale!=1) caffe_cpu_scale(feat_len, scale, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j)); Dtype d = caffe_cpu_dot(feat_len, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j)); if(log_flag) LOG(INFO) << "i " << i << ", j " << j << ", d " << d; dist_sq_.mutable_cpu_data()[i*N+j] = d; if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0 && (neg_min_val==-1 || d<neg_min_val)) neg_min_val = d; else if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)==0 && (pos_max_val==-1 || d>pos_max_val)) pos_max_val = d; } for(int j=0; j<N; ++j) { if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0) { Dtype d = dist_sq_.cpu_data()[i*N+j]; Dtype mdist = std::max(-d+margin+pos_max_val, Dtype(0)); if(log_flag) LOG(INFO) << "j=" << j << ", d=" << d << ", pos_max_val=" << pos_max_val << ", mdist=" << mdist; if(mdist>0) neg_backward[i*N+j] = true; } else { Dtype d = dist_sq_.cpu_data()[i*N+j]; Dtype mdist = std::max(d+margin-neg_min_val, Dtype(0)); if(log_flag) LOG(INFO) << "j=" << j << ", d=" << d << ", neg_min_val=" << neg_min_val << ", mdist=" << mdist; if(mdist>0) pos_backward[i*N+j] = true; pos_mdist += mdist; } } /* average punishment */ pos_mdist /= pos_ids[i].size(); // pos_mdist *= 2; if(log_flag) LOG(INFO) << "pos_mdist " << pos_mdist << ", neg_min_val " << neg_min_val; CHECK_GE(pos_ids[i].size(), 2); CHECK_GE(neg_ids[i].size(), 1); loss += pos_mdist; ++cnt; } loss = loss / cnt; top[0]->mutable_cpu_data()[0] = loss; }