void DotProductSimilarityLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  int count = bottom[0]->count();
  int num = bottom[0]->num();
  int dim = count / num;
  const Dtype * top_diff = top[0]->cpu_diff();
  for (int i=0; i<num; ++i) {
    caffe_cpu_scale(dim, top_diff[i],
        bottom[0]->cpu_data() + bottom[0]->offset(i),
        bottom[1]->mutable_cpu_diff() + bottom[1]->offset(i));
    caffe_cpu_scale(dim, top_diff[i],
        bottom[1]->cpu_data() + bottom[1]->offset(i),
        bottom[0]->mutable_cpu_diff() + bottom[0]->offset(i));
  }
}
Ejemplo n.º 2
0
void ScaleLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  if (bottom[0] == top[0]) {
    // In-place computation; need to store bottom data before overwriting it.
    // Note that this is only necessary for Backward; we could skip this if not
    // doing Backward, but Caffe currently provides no way of knowing whether
    // we'll need to do Backward at the time of the Forward call.
    caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(),
               temp_.mutable_cpu_data());
  }
  const Dtype* scale_data =
      ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  for (int n = 0; n < outer_dim_; ++n) {
    for (int d = 0; d < scale_dim_; ++d) {
      const Dtype factor = scale_data[d];
      caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data);
      bottom_data += inner_dim_;
      top_data += inner_dim_;
    }
  }
  if (bias_layer_) {
    bias_layer_->Forward(bias_bottom_vec_, top);
  }
}
Ejemplo n.º 3
0
void NormalizeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  int n = top[0]->num();
  int d = top[0]->count() / n;
  for (int i=0; i<n; ++i) {
    Dtype a = caffe_cpu_dot(d, top_data+i*d, top_diff+i*d);
    caffe_cpu_scale(d, a, top_data+i*d, bottom_diff+i*d);
    caffe_sub(d, top_diff+i*d, bottom_diff+i*d, bottom_diff+i*d);
    a = caffe_cpu_dot(d, bottom_data+i*d, bottom_data+i*d);
    caffe_cpu_scale(d, Dtype(pow(a, -0.5)), bottom_diff+i*d, bottom_diff+i*d);
  }
}
Ejemplo n.º 4
0
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
  if (propagate_down) {
    const int count = top[0]->count();
    const Dtype* top_data = top[0]->cpu_data();
    const Dtype* top_diff = top[0]->cpu_diff();
    for (int i = 0; i < bottom->size(); ++i) {
      const Dtype* bottom_data = (*bottom)[i]->cpu_data();
      Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
      switch (op_) {
      case EltwiseParameter_EltwiseOp_PROD:
        caffe_div(count, top_data, bottom_data, bottom_diff);
        caffe_mul(count, bottom_diff, top_diff, bottom_diff);
        break;
      case EltwiseParameter_EltwiseOp_SUM:
        if (coeffs_[i] == Dtype(1)) {
          caffe_copy(count, top_diff, bottom_diff);
        } else {
          caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
        }
        break;
      default:
        LOG(FATAL) << "Unknown elementwise operation.";
      }
    }
  }
}
Ejemplo n.º 5
0
void EuclideanSimilarityLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  int num = bottom[0]->num();
  int count = bottom[0]->count();
  int dim = count / num;

  const Dtype * pd = diff_.cpu_data();
  const Dtype * pt = top[0]->cpu_diff();
  Dtype * pa = bottom[0]->mutable_cpu_diff();
  Dtype * pb = bottom[1]->mutable_cpu_diff();

  for (int i=0; i<num; ++i) {
    caffe_cpu_scale(dim, -pt[i], pd, pa);
    caffe_cpu_scale(dim,  pt[i], pd, pb);
    pd += dim; pa += dim; pb += dim;
  }
}
void TripletLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    //Dtype scalar = top[0]->cpu_diff()[0] / triplet_num_;
    Dtype scalar = top[0]->cpu_diff()[0] / sample_num_;
    caffe_cpu_scale(bottom_diff_.count(), scalar, bottom_diff_.cpu_data(),
        bottom[0]->mutable_cpu_diff());
  }
}
void TripletLossLayer<Dtype>::ComputeDiff_cpu(const Dtype *x_1,
  const Dtype *x_2, const Dtype x_1_norm, const Dtype x_2_norm,
  const Dtype inner_val, Dtype *x_1_diff) {
  caffe_cpu_scale(feature_dim_, Dtype(1) / (x_1_norm * x_2_norm),
      x_2, x_1_diff);
  Dtype x_1_norm_cubic = x_1_norm * x_1_norm * x_1_norm;
  caffe_cpu_axpby(feature_dim_, -inner_val / (x_1_norm_cubic * x_2_norm),
      x_1, Dtype(1), x_1_diff);
}
Ejemplo n.º 8
0
void GradientScalerLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    const int count = bottom[0]->count();
    const Dtype* top_diff = top[0]->cpu_diff();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

    caffe_cpu_scale(count, Dtype(-coeff_), top_diff, bottom_diff);
  }
}
Ejemplo n.º 9
0
TYPED_TEST(BlobMathTest, TestSumOfSquares) {
  typedef typename TypeParam::Dtype Dtype;

  // Uninitialized Blob should have sum of squares == 0.
  EXPECT_EQ(0, this->blob_->sumsq_data());
  EXPECT_EQ(0, this->blob_->sumsq_diff());
  FillerParameter filler_param;
  filler_param.set_min(-3);
  filler_param.set_max(3);
  UniformFiller<Dtype> filler(filler_param);
  filler.Fill(this->blob_);
  Dtype expected_sumsq = 0;
  const Dtype* data = this->blob_->cpu_data();
  for (int i = 0; i < this->blob_->count(); ++i) {
    expected_sumsq += data[i] * data[i];
  }
  // Do a mutable access on the current device,
  // so that the sumsq computation is done on that device.
  // (Otherwise, this would only check the CPU sumsq implementation.)
  switch (TypeParam::device) {
  case Engine::CPU:
    this->blob_->mutable_cpu_data();
    break;
  case Engine::GPU:
    this->blob_->mutable_gpu_data();
    break;
  default:
    LOG(FATAL) << "Unknown device: " << TypeParam::device;
  }
  EXPECT_NEAR(expected_sumsq, this->blob_->sumsq_data(),
              this->epsilon_ * expected_sumsq);
  EXPECT_EQ(0, this->blob_->sumsq_diff());

  // Check sumsq_diff too.
  const Dtype kDiffScaleFactor = 7;
  caffe_cpu_scale(this->blob_->count(), kDiffScaleFactor, data,
                  this->blob_->mutable_cpu_diff());
  switch (TypeParam::device) {
  case Engine::CPU:
    this->blob_->mutable_cpu_diff();
    break;
  case Engine::GPU:
    this->blob_->mutable_gpu_diff();
    break;
  default:
    LOG(FATAL) << "Unknown device: " << TypeParam::device;
  }
  EXPECT_NEAR(expected_sumsq, this->blob_->sumsq_data(),
              this->epsilon_ * expected_sumsq);
  const Dtype expected_sumsq_diff =
      expected_sumsq * kDiffScaleFactor * kDiffScaleFactor;
  EXPECT_NEAR(expected_sumsq_diff, this->blob_->sumsq_diff(),
              this->epsilon_ * expected_sumsq_diff);
}
Ejemplo n.º 10
0
void ScalarLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* bottom_data = bottom[0]->cpu_data();
    // Hack: store big eltwise product in bottom[0] diff, except in the special
    // case where this layer itself does the eltwise product, in which case we
    // can store it directly in the scalar diff, and we're done.
    const bool is_eltwise = (inner_dim_ == 1 && outer_dim_ == 1);
    Dtype* product = is_eltwise ?
        bottom[1]->mutable_cpu_diff() : bottom[0]->mutable_cpu_diff();
    caffe_mul(top[0]->count(), top_diff, bottom_data, product);
    if (!is_eltwise) {
      Dtype* sum_result = NULL;
      if (inner_dim_ == 1) {
        sum_result = product;
      } else if (sum_result_.count() == 1) {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        Dtype* scalar_diff = bottom[1]->mutable_cpu_diff();
        *scalar_diff = caffe_cpu_dot(inner_dim_, product, sum_mult);
      } else {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        sum_result = (outer_dim_ == 1) ?
            bottom[1]->mutable_cpu_diff() : sum_result_.mutable_cpu_data();
        caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
                       Dtype(1), product, sum_mult, Dtype(0), sum_result);
      }
      if (outer_dim_ != 1) {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        Dtype* scalar_diff = bottom[1]->mutable_cpu_diff();
        if (scalar_dim_ == 1) {
          *scalar_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
        } else {
          caffe_cpu_gemv(CblasTrans, outer_dim_, scalar_dim_,
                         Dtype(1), sum_result, sum_mult, Dtype(0), scalar_diff);
        }
      }
    }
  }
  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* scalar_data = bottom[1]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    for (int n = 0; n < outer_dim_; ++n) {
      for (int d = 0; d < scalar_dim_; ++d) {
        const Dtype factor = scalar_data[d];
        caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff);
        bottom_diff += inner_dim_;
        top_diff += inner_dim_;
      }
    }
  }
}
Ejemplo n.º 11
0
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const int* mask = NULL;
  const int count = top[0]->count();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  for (int i = 0; i < bottom.size(); ++i) {
    if (propagate_down[i]) {
      const Dtype* bottom_data = bottom[i]->cpu_data();
      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
      switch (op_) {
      case EltwiseParameter_EltwiseOp_PROD:
        if (stable_prod_grad_) {
          bool initialized = false;
          for (int j = 0; j < bottom.size(); ++j) {
            if (i == j) { continue; }
            if (!initialized) {
              caffe_copy(count, bottom[j]->cpu_data(), bottom_diff);
              initialized = true;
            } else {
              caffe_mul(count, bottom[j]->cpu_data(), bottom_diff,
                        bottom_diff);
            }
          }
        } else {
          caffe_div(count, top_data, bottom_data, bottom_diff);
        }
        caffe_mul(count, bottom_diff, top_diff, bottom_diff);
        break;
      case EltwiseParameter_EltwiseOp_SUM:
        if (coeffs_[i] == Dtype(1)) {
          caffe_copy(count, top_diff, bottom_diff);
        } else {
          caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
        }
        break;
      case EltwiseParameter_EltwiseOp_MAX:
        mask = max_idx_.cpu_data();
        for (int index = 0; index < count; ++index) {
          Dtype gradient = 0;
          if (mask[index] == i) {
            gradient += top_diff[index];
          }
          bottom_diff[index] = gradient;
        }
        break;
      default:
        LOG(FATAL) << "Unknown elementwise operation.";
      }
    }
  }
}
Ejemplo n.º 12
0
void ScalarLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* scalar_data = bottom[1]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  for (int n = 0; n < outer_dim_; ++n) {
    for (int d = 0; d < scalar_dim_; ++d) {
      const Dtype factor = scalar_data[d];
      caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data);
      bottom_data += inner_dim_;
      top_data += inner_dim_;
    }
  }
}
Ejemplo n.º 13
0
void ExpLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const int count = bottom[0]->count();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  if (inner_scale_ == Dtype(1)) {
    caffe_exp(count, bottom_data, top_data);
  } else {
    caffe_cpu_scale(count, inner_scale_, bottom_data, top_data);
    caffe_exp(count, top_data, top_data);
  }
  if (outer_scale_ != Dtype(1)) {
    caffe_scal(count, outer_scale_, top_data);
  }
}
Ejemplo n.º 14
0
void ReductionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  // Get bottom_data, if needed.
  const Dtype* bottom_data = NULL;
  switch (op_) {
  // Operations that don't need bottom_data
  case ReductionParameter_ReductionOp_SUM:
  case ReductionParameter_ReductionOp_MEAN:
    break;
  // Operations that need bottom_data
  case ReductionParameter_ReductionOp_ASUM:
  case ReductionParameter_ReductionOp_SUMSQ:
    bottom_data = bottom[0]->cpu_data();
    break;
  default:
    LOG(FATAL) << "Unknown reduction op: "
        << ReductionParameter_ReductionOp_Name(op_);
  }
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  for (int i = 0; i < num_; ++i) {
    Dtype bottom_coeff = (*top_diff) * coeff_;
    if (op_ == ReductionParameter_ReductionOp_MEAN) {
      bottom_coeff /= dim_;
    }
    switch (op_) {
    case ReductionParameter_ReductionOp_SUM:
    case ReductionParameter_ReductionOp_MEAN:
      caffe_set(dim_, bottom_coeff, bottom_diff);
      break;
    case ReductionParameter_ReductionOp_ASUM:
      caffe_cpu_sign(dim_, bottom_data, bottom_diff);
      caffe_scal(dim_, bottom_coeff, bottom_diff);
      break;
    case ReductionParameter_ReductionOp_SUMSQ:
      caffe_cpu_scale(dim_, 2 * bottom_coeff, bottom_data, bottom_diff);
      break;
    default:
      LOG(FATAL) << "Unknown reduction op: "
          << ReductionParameter_ReductionOp_Name(op_);
    }
    bottom_data += dim_;
    bottom_diff += dim_;
    ++top_diff;
  }
}
Ejemplo n.º 15
0
void EuclideanSimilarityLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int num = bottom[0]->num();
  int count = bottom[0]->count();
  int dim = count / num;
  caffe_sub(
      count,
      bottom[0]->cpu_data(),
      bottom[1]->cpu_data(),
      diff_.mutable_cpu_data());
  Dtype * sim = top[0]->mutable_cpu_data();
  const Dtype * pd = diff_.cpu_data();
  for (int i=0; i<num; ++i) {
    sim[i] = caffe_cpu_dot(dim, pd, pd);
    pd += dim;
  }
  caffe_cpu_scale(num, Dtype(-1.0), sim, sim);
}
Ejemplo n.º 16
0
void ScaleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (bias_layer_ &&
      this->param_propagate_down_[this->param_propagate_down_.size() - 1]) {
    bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_);
  }
  const bool scale_param = (bottom.size() == 1);
  Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1];
  if ((!scale_param && propagate_down[1]) ||
      (scale_param && this->param_propagate_down_[0])) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const bool in_place = (bottom[0] == top[0]);
    const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data();
    // Hack: store big eltwise product in bottom[0] diff, except in the special
    // case where this layer itself does the eltwise product, in which case we
    // can store it directly in the scale diff, and we're done.
    // If we're computing in-place (and not doing eltwise computation), this
    // hack doesn't work and we store the product in temp_.
    const bool is_eltwise = (bottom[0]->count() == scale->count());
    Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() :
        (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff()));
    caffe_mul(top[0]->count(), top_diff, bottom_data, product);
    if (!is_eltwise) {
      Dtype* sum_result = NULL;
      if (inner_dim_ == 1) {
        sum_result = product;
      } else if (sum_result_.count() == 1) {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        Dtype* scale_diff = scale->mutable_cpu_diff();
        if (scale_param) {
          Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult);
          *scale_diff += result;
        } else {
          *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult);
        }
      } else {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        sum_result = (outer_dim_ == 1) ?
            scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data();
        caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
                       Dtype(1), product, sum_mult, Dtype(0), sum_result);
      }
      if (outer_dim_ != 1) {
        const Dtype* sum_mult = sum_multiplier_.cpu_data();
        Dtype* scale_diff = scale->mutable_cpu_diff();
        if (scale_dim_ == 1) {
          if (scale_param) {
            Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
            *scale_diff += result;
          } else {
            *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
          }
        } else {
          caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_,
                         Dtype(1), sum_result, sum_mult, Dtype(scale_param),
                         scale_diff);
        }
      }
    }
  }
  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* scale_data = scale->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    for (int n = 0; n < outer_dim_; ++n) {
      for (int d = 0; d < scale_dim_; ++d) {
        const Dtype factor = scale_data[d];
        caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff);
        bottom_diff += inner_dim_;
        top_diff += inner_dim_;
      }
    }
  }
}
Ejemplo n.º 17
0
void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  int num = bottom[0]->shape(0);
  int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_);

  if (bottom[0] != top[0]) {
    caffe_copy(bottom[0]->count(), bottom_data, top_data);
  }

  if (use_global_stats_) {
    // use the stored mean/variance estimates.
    const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
        0 : 1 / this->blobs_[2]->cpu_data()[0];
    caffe_cpu_scale(variance_.count(), scale_factor,
        this->blobs_[0]->cpu_data(), mean_.mutable_cpu_data());
    caffe_cpu_scale(variance_.count(), scale_factor,
        this->blobs_[1]->cpu_data(), variance_.mutable_cpu_data());
  } else {
    // compute mean
    caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
        1. / (num * spatial_dim), bottom_data,
        spatial_sum_multiplier_.cpu_data(), 0.,
        num_by_chans_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
        num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
        mean_.mutable_cpu_data());
  }

  // subtract mean
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
      spatial_dim, 1, -1, num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 1., top_data);

  if (!use_global_stats_) {
    // compute variance using var(X) = E((X-EX)^2)
    caffe_powx(top[0]->count(), top_data, Dtype(2),
        temp_.mutable_cpu_data());  // (X-EX)^2
    caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
        1. / (num * spatial_dim), temp_.cpu_data(),
        spatial_sum_multiplier_.cpu_data(), 0.,
        num_by_chans_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
        num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
        variance_.mutable_cpu_data());  // E((X_EX)^2)

    // compute and save moving average
    this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_;
    this->blobs_[2]->mutable_cpu_data()[0] += 1;
    caffe_cpu_axpby(mean_.count(), Dtype(1), mean_.cpu_data(),
        moving_average_fraction_, this->blobs_[0]->mutable_cpu_data());
    int m = bottom[0]->count()/channels_;
    Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1;
    caffe_cpu_axpby(variance_.count(), bias_correction_factor,
        variance_.cpu_data(), moving_average_fraction_,
        this->blobs_[1]->mutable_cpu_data());
  }

  // normalize variance
  caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data());
  caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
             variance_.mutable_cpu_data());

  // replicate variance to input size
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), variance_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
      spatial_dim, 1, 1., num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data());
  caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
  // TODO(cdoersch): The caching is only needed because later in-place layers
  //                 might clobber the data.  Can we skip this if they won't?
  caffe_copy(x_norm_.count(), top_data,
      x_norm_.mutable_cpu_data());
}
Ejemplo n.º 18
0
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const int* mask = NULL;
  const int count = top[0]->count();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  if (broadcast_) {
    bool broadcasted[2];
    int i, j;
    broadcasted[0] = broadcasted[1] = false;
    for (int i=0; i<4; i++) {
      if (bottom[0]->shape()[i] > bottom[1]->shape()[i]) broadcasted[1] = true;
      if (bottom[0]->shape()[i] < bottom[1]->shape()[i]) broadcasted[0] = true;
    }

    i=0; j=1; //i -> not broadcasted  j-> broadcasted
    if (broadcasted[0]){ i=1; j=0; }

    int dima[4], dimb[4];
    const Dtype* bot_data = bottom[i]->cpu_data();
    const Dtype* bot_data_brd = bottom[j]->cpu_data();
    Dtype*       bot_diff = bottom[i]->mutable_cpu_diff();
    Dtype*       bot_diff_brd = bottom[j]->mutable_cpu_diff();

    for (int n=0; n<4; n++) dima[n] = bottom[i]->shape()[n];
    for (int n=0; n<4; n++) dimb[n] = bottom[j]->shape()[n];

    switch(op_)
    {
    case EltwiseParameter_EltwiseOp_PROD:
      if (propagate_down[j]) {
        int n=0;
        for (int x=0; x<4; x++) n *= dima[x];
        caffe_mul<Dtype>(n, top_diff, bot_data, bot_diff);
        caffe_sum_reduce<Dtype>(dima, dimb, bot_diff, bot_diff_brd);
        caffe_set(n, Dtype(0), bot_diff);
      }
      
      if (propagate_down[i])
        caffe_mul_broadcast<Dtype>(dima, dimb, top_diff, bot_data_brd, bot_diff);

      break;
    case EltwiseParameter_EltwiseOp_SUM:
      if (propagate_down[j]) 
        caffe_sum_reduce<Dtype>(dima, dimb, top_diff, bot_diff_brd);

      if (propagate_down[i]) {
        int n=0;
        for (int x=0; x<4; x++) n *= dima[x];
        caffe_copy<Dtype>(n, top_diff, bot_diff);
      }
      break;
    default:
      LOG(FATAL) << "Unknown elementwise operation.";
    }
  } else {
    for (int i = 0; i < bottom.size(); ++i) {
      if (propagate_down[i]) {
        const Dtype* bottom_data = bottom[i]->cpu_data();
        Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
        switch (op_) {
        case EltwiseParameter_EltwiseOp_PROD:
          if (stable_prod_grad_) {
            bool initialized = false;
            for (int j = 0; j < bottom.size(); ++j) {
              if (i == j) { continue; }
              if (!initialized) {
                caffe_copy(count, bottom[j]->cpu_data(), bottom_diff);
                initialized = true;
              } else {
                caffe_mul(count, bottom[j]->cpu_data(), bottom_diff,
                          bottom_diff);
              }
            }
          } else {
            caffe_div(count, top_data, bottom_data, bottom_diff);
          }
          caffe_mul(count, bottom_diff, top_diff, bottom_diff);
          break;
        case EltwiseParameter_EltwiseOp_SUM:
          if (coeffs_[i] == Dtype(1)) {
            caffe_copy(count, top_diff, bottom_diff);
          } else {
            caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
          }
          break;
        case EltwiseParameter_EltwiseOp_MAX:
          mask = max_idx_.cpu_data();
          for (int index = 0; index < count; ++index) {
            Dtype gradient = 0;
            if (mask[index] == i) {
              gradient += top_diff[index];
            }
            bottom_diff[index] = gradient;
          }
          break;
        default:
          LOG(FATAL) << "Unknown elementwise operation.";
        }
      }
    }
  }
}
Ejemplo n.º 19
0
void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype delta = this->param_.delta();
  Dtype momentum = this->param_.momentum();
  Dtype local_rate = rate * net_params_lr[param_id];
  size_t update_history_offset = net_params.size();
  switch (Caffe::mode()) {
  case Caffe::CPU: {
    // compute square of gradient in update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history of gradients
    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
        this->update_[param_id]->cpu_data(), momentum,
        this->history_[param_id]->mutable_cpu_data());

    // add delta to history to guard against dividing by zero later
    caffe_set(net_params[param_id]->count(), delta,
        this->temp_[param_id]->mutable_cpu_data());

    caffe_add(net_params[param_id]->count(),
        this->temp_[param_id]->cpu_data(),
        this->history_[update_history_offset + param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    caffe_add(net_params[param_id]->count(),
        this->temp_[param_id]->cpu_data(),
        this->history_[param_id]->cpu_data(),
        this->temp_[param_id]->mutable_cpu_data());

    // divide history of updates by history of gradients
    caffe_div(net_params[param_id]->count(),
        this->update_[param_id]->cpu_data(),
        this->temp_[param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    // jointly compute the RMS of both for update and gradient history
    caffe_powx(net_params[param_id]->count(),
        this->update_[param_id]->cpu_data(), Dtype(0.5),
        this->update_[param_id]->mutable_cpu_data());

    // compute the update
    caffe_mul(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(),
        this->update_[param_id]->cpu_data(),
        net_params[param_id]->mutable_cpu_diff());

    // compute square of update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history of updates
    caffe_cpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
        this->update_[param_id]->cpu_data(), momentum,
        this->history_[update_history_offset + param_id]->mutable_cpu_data());

    // apply learning rate
    caffe_cpu_scale(net_params[param_id]->count(), local_rate,
        net_params[param_id]->cpu_diff(),
        net_params[param_id]->mutable_cpu_diff());
    break;
  }
  case Caffe::GPU: {
#ifndef CPU_ONLY
    adadelta_update_gpu(net_params[param_id]->count(),
        net_params[param_id]->mutable_gpu_diff(),
        this->history_[param_id]->mutable_gpu_data(),
        this->history_[update_history_offset + param_id]->mutable_gpu_data(),
        momentum, delta, local_rate);
#else
    NO_GPU;
#endif
    break;
  }
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
Ejemplo n.º 20
0
TYPED_TEST(BlobMathTest, TestScaleData) {
  typedef typename TypeParam::Dtype Dtype;

  EXPECT_EQ(0, this->blob_->asum_data());
  EXPECT_EQ(0, this->blob_->asum_diff());
  FillerParameter filler_param;
  filler_param.set_min(-3);
  filler_param.set_max(3);
  UniformFiller<Dtype> filler(filler_param);
  filler.Fill(this->blob_);
  const Dtype asum_before_scale = this->blob_->asum_data();
  // Do a mutable access on the current device,
  // so that the asum computation is done on that device.
  // (Otherwise, this would only check the CPU asum implementation.)
  switch (TypeParam::device) {
  case Engine::CPU:
    this->blob_->mutable_cpu_data();
    break;
  case Engine::GPU:
    this->blob_->mutable_gpu_data();
    break;
  default:
    LOG(FATAL) << "Unknown device: " << TypeParam::device;
  }
  const Dtype kDataScaleFactor = 3;
  this->blob_->scale_data(kDataScaleFactor);
  EXPECT_NEAR(asum_before_scale * kDataScaleFactor, this->blob_->asum_data(),
              this->epsilon_ * asum_before_scale * kDataScaleFactor);
  EXPECT_EQ(0, this->blob_->asum_diff());

  // Check scale_diff too.
  const Dtype kDataToDiffScaleFactor = 7;
  const Dtype* data = this->blob_->cpu_data();
  caffe_cpu_scale(this->blob_->count(), kDataToDiffScaleFactor, data,
                  this->blob_->mutable_cpu_diff());
  const Dtype expected_asum_before_scale = asum_before_scale * kDataScaleFactor;
  EXPECT_NEAR(expected_asum_before_scale, this->blob_->asum_data(),
              this->epsilon_ * expected_asum_before_scale);
  const Dtype expected_diff_asum_before_scale =
      asum_before_scale * kDataScaleFactor * kDataToDiffScaleFactor;
  EXPECT_NEAR(expected_diff_asum_before_scale, this->blob_->asum_diff(),
              this->epsilon_ * expected_diff_asum_before_scale);
  switch (TypeParam::device) {
  case Engine::CPU:
    this->blob_->mutable_cpu_diff();
    break;
  case Engine::GPU:
    this->blob_->mutable_gpu_diff();
    break;
  default:
    LOG(FATAL) << "Unknown device: " << TypeParam::device;
  }
  const Dtype kDiffScaleFactor = 3;
  this->blob_->scale_diff(kDiffScaleFactor);
  EXPECT_NEAR(asum_before_scale * kDataScaleFactor, this->blob_->asum_data(),
              this->epsilon_ * asum_before_scale * kDataScaleFactor);
  const Dtype expected_diff_asum =
      expected_diff_asum_before_scale * kDiffScaleFactor;
  EXPECT_NEAR(expected_diff_asum, this->blob_->asum_diff(),
              this->epsilon_ * expected_diff_asum);
}
Ejemplo n.º 21
0
void CoupledClusterLossLayer<Dtype>::Forward_cpu(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
    pos_ids = std::vector<std::vector<int> >(group_num, std::vector<int>());
    neg_ids = std::vector<std::vector<int> >(group_num, std::vector<int>());
    pos_backward = std::vector<bool>(group_num*N, false);
    neg_backward = std::vector<bool>(group_num*N, false);
    const Dtype *feat_ptr = bottom[0]->cpu_data();
    const Dtype *label_ptr = bottom[1]->cpu_data();
    Dtype *diff_ptr_ = diff_.mutable_cpu_data();
    Dtype loss(0);

    caffe_set(feat_len*group_num, Dtype(0), pos_center_.mutable_cpu_data());

    int cnt = 0;
    /* i -> group index */
    for(int i=0; i<group_num; ++i) {
        /* search for the positive id */
        std::set<Dtype> labels;
        Dtype anchor_id = -1;
        for(int j=0; j<N; ++j) {
            Dtype tmp = label_ptr[N*i+j];
            if(labels.count(tmp)>0) {
                anchor_id = tmp;
                break;
            }
            else
                labels.insert(tmp);
        }
        // CHECK_NE(anchor_id, -1);
        /* collect for positive and negative ids, compute the center of positive samples */
        for(int j=0; j<N; ++j) {
            if(label_ptr[i*N+j]==anchor_id){
                pos_ids[i].push_back(j);
                caffe_add(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i);
            }
            else neg_ids[i].push_back(j);
        }
        caffe_cpu_scale(feat_len, Dtype(1)/pos_ids[i].size(), pos_center_.mutable_cpu_data()+feat_len*i, pos_center_.mutable_cpu_data()+feat_len*i);

        if(neg_ids[i].size()==0 || pos_ids[i].size()<=1) continue;

        Dtype pos_mdist = Dtype(0);
        Dtype neg_min_val = -1;
        Dtype pos_max_val = -1;
        for(int j=0; j<N; ++j) {
            // f[j]-center
            caffe_sub(feat_len, feat_ptr+feat_len*(i*N+j), pos_center_.cpu_data()+feat_len*i, diff_ptr_+feat_len*(i*N+j));
            if(scale!=1)
                caffe_cpu_scale(feat_len, scale, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j));
            Dtype d = caffe_cpu_dot(feat_len, diff_ptr_+feat_len*(i*N+j), diff_ptr_+feat_len*(i*N+j));
            if(log_flag)
                LOG(INFO) << "i " << i << ", j " << j << ", d " << d;
            dist_sq_.mutable_cpu_data()[i*N+j] = d;
            if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0 && (neg_min_val==-1 || d<neg_min_val)) neg_min_val = d;
            else if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)==0 && (pos_max_val==-1 || d>pos_max_val)) pos_max_val = d;
        }
        for(int j=0; j<N; ++j) {
            if(std::count(neg_ids[i].begin(), neg_ids[i].end(), j)>0) {
                Dtype d = dist_sq_.cpu_data()[i*N+j];
                Dtype mdist = std::max(-d+margin+pos_max_val, Dtype(0));
                if(log_flag)
                    LOG(INFO) << "j=" << j << ", d=" << d << ", pos_max_val=" << pos_max_val << ", mdist=" << mdist;
                if(mdist>0) neg_backward[i*N+j] = true;
            }
            else {
                Dtype d = dist_sq_.cpu_data()[i*N+j];
                Dtype mdist = std::max(d+margin-neg_min_val, Dtype(0));
                if(log_flag)
                    LOG(INFO) << "j=" << j << ", d=" << d << ", neg_min_val=" << neg_min_val << ", mdist=" << mdist;
                if(mdist>0) pos_backward[i*N+j] = true;
                pos_mdist += mdist;
            }
        }
        /* average punishment */
        pos_mdist /= pos_ids[i].size();
        // pos_mdist *= 2;

        if(log_flag)
            LOG(INFO) << "pos_mdist " << pos_mdist << ", neg_min_val " << neg_min_val;

        CHECK_GE(pos_ids[i].size(), 2);
        CHECK_GE(neg_ids[i].size(), 1);

        loss += pos_mdist;
        ++cnt;
    }
    loss = loss / cnt;
    top[0]->mutable_cpu_data()[0] = loss;
}