void CosineSimilarityLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {


  for (int i = 0; i < bottom[0]->num(); ++i){
    summer_vec_.mutable_cpu_data()[i] = Dtype(1);
  }

  int channels = bottom[0]->channels();
  for (int i = 0; i < bottom[0]->num(); ++i) {
    xx_.mutable_cpu_data()[i] = caffe_cpu_dot(bottom[0]->channels(), bottom[0]->cpu_data() + i * channels, 
							       bottom[0]->cpu_data() + i * channels);
    yy_.mutable_cpu_data()[i] = caffe_cpu_dot(bottom[1]->channels(), bottom[1]->cpu_data() + i * channels, 
							       bottom[1]->cpu_data() + i * channels);
    xy_.mutable_cpu_data()[i] = caffe_cpu_dot(bottom[0]->channels(), bottom[0]->cpu_data() + i * channels, 
							       bottom[1]->cpu_data() + i * channels);
  }
  caffe_mul(bottom[1]->num(), xx_.cpu_data(),yy_.cpu_data(), summer_vec_.mutable_cpu_data());

  for (int i = 0; i < bottom[0]->num(); ++i) {
    summer_vec_.mutable_cpu_data()[i] = sqrt(summer_vec_.cpu_data()[i]);
  } 
  caffe_div(bottom[1]->num(), xy_.cpu_data(), summer_vec_.cpu_data(), top[0]->mutable_cpu_data());
}
Esempio n. 2
0
	void DeconvNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
		const vector<Blob<Dtype>*>& top)
	{
		Dtype* wa = weights_alphas->mutable_cpu_data();

		exp_layer->Forward(exp_bottom_vec, exp_top_vec);
		for (int ch_in = 0; ch_in < weights_alphas->num(); ++ch_in)
		{
			caffe_mul(alphas->count(), this->blobs_[0]->cpu_data() + this->blobs_[0]->offset(ch_in),
				alphas->cpu_data(), wa + weights_alphas->offset(ch_in));
		}

		deconv2_layer->Forward(bottom, deconv2_top_vec);
		deconv1_layer->Forward(deconv1_bottom_vec, deconv1_top_vec);

		Dtype* top_data = top[0]->mutable_cpu_data();
		const Dtype* deconv1_top_vec_data = deconv1_top_vec[0]->cpu_data();
		const Dtype* deconv2_top_vec_data = deconv2_top_vec[0]->cpu_data();
		caffe_add_scalar(deconv1_top_vec[0]->count(), (Dtype) std::numeric_limits<Dtype>::epsilon(), 
			deconv1_top_vec[0]->mutable_cpu_data());
		for (int n = 0; n < bottom[0]->num(); ++n)
		{
			caffe_div(deconv1_top_vec[0]->count(), deconv2_top_vec_data + deconv2_top_vec[0]->offset(n),
				deconv1_top_vec_data, top_data + top[0]->offset(n));

			if (this->bias_term_)
			{
				const Dtype* bias = this->blobs_[2]->cpu_data();
				caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, top[0]->channels(),
					top[0]->height() * top[0]->width(), 1, (Dtype)1., bias, bias_multiplier.cpu_data(),
					(Dtype)1., top_data + top[0]->offset(n));
			}
		}
	}
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
  if (propagate_down) {
    const int count = top[0]->count();
    const Dtype* top_data = top[0]->cpu_data();
    const Dtype* top_diff = top[0]->cpu_diff();
    for (int i = 0; i < bottom->size(); ++i) {
      const Dtype* bottom_data = (*bottom)[i]->cpu_data();
      Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
      switch (op_) {
      case EltwiseParameter_EltwiseOp_PROD:
        caffe_div(count, top_data, bottom_data, bottom_diff);
        caffe_mul(count, bottom_diff, top_diff, bottom_diff);
        break;
      case EltwiseParameter_EltwiseOp_SUM:
        if (coeffs_[i] == Dtype(1)) {
          caffe_copy(count, top_diff, bottom_diff);
        } else {
          caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
        }
        break;
      default:
        LOG(FATAL) << "Unknown elementwise operation.";
      }
    }
  }
}
Esempio n. 4
0
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
                                  vector<Blob<Dtype>*>* top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = (*top)[0]->mutable_cpu_data();
    int num;
    if (this->layer_param_.mvn_param().across_channels())
        num = bottom[0]->num();
    else
        num = bottom[0]->num() * bottom[0]->channels();

    int dim = bottom[0]->count() / num;
    Dtype eps = 1e-10;

    if (this->layer_param_.mvn_param().normalize_variance()) {
        // put the squares of bottom into temp_
        caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
                   temp_.mutable_cpu_data());

        // computes variance using var(X) = E(X^2) - (EX)^2
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
                              sum_multiplier_.cpu_data(), 0.,
                              variance_.mutable_cpu_data());  // E(X^2)
        caffe_powx(mean_.count(), mean_.cpu_data(), Dtype(2),
                   temp_.mutable_cpu_data());  // (EX)^2
        caffe_sub(mean_.count(), variance_.cpu_data(), temp_.cpu_data(),
                  variance_.mutable_cpu_data());  // variance

        // do mean and variance normalization
        // subtract mean
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);

        // normalize variance
        caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
                   variance_.mutable_cpu_data());

        caffe_add_scalar(variance_.count(), eps, variance_.mutable_cpu_data());

        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
                              variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
    } else {
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
                              sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX

        // subtract mean
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
                              mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
                              temp_.mutable_cpu_data());

        caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);
    }
}
Esempio n. 5
0
void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  Dtype* scale_data = scale_.mutable_cpu_data();
  int channels = bottom[0]->shape(softmax_axis_);
  int dim = bottom[0]->count() / outer_num_;
  caffe_copy(bottom[0]->count(), bottom_data, top_data);
  // We need to subtract the max to avoid numerical issues, compute the exp,
  // and then normalize.
  for (int i = 0; i < outer_num_; ++i) {
    // initialize scale_data to the first plane
    caffe_copy(inner_num_, bottom_data + i * dim, scale_data);
    for (int j = 0; j < channels; j++) {
      for (int k = 0; k < inner_num_; k++) {
        scale_data[k] = std::max(scale_data[k],
            bottom_data[i * dim + j * inner_num_ + k]);
      }
    }
    // subtraction
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_,
        1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data);
    // exponentiation
    caffe_exp<Dtype>(dim, top_data, top_data);
    // sum after exp
    caffe_cpu_gemv<Dtype>(CblasTrans, channels, inner_num_, 1.,
        top_data, sum_multiplier_.cpu_data(), 0., scale_data);
    // division
    for (int j = 0; j < channels; j++) {
      caffe_div(inner_num_, top_data, scale_data, top_data);
      top_data += inner_num_;
    }
  }
}
Esempio n. 6
0
Dtype SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    vector<Blob<Dtype>*>* top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = (*top)[0]->mutable_cpu_data();
  Dtype* scale_data = scale_.mutable_cpu_data();
  size_t num = bottom[0]->num();
  size_t channels = bottom[0]->channels();
  size_t dim = bottom[0]->count() / outer_num_;
  //size_t dim = bottom[0]->count() / bottom[0]->num();
  caffe_copy(bottom[0]->count(), bottom_data, top_data);

  for (size_t i = 0; i < outer_num_; ++i) {
    // initialize scale_data to the first plane
    caffe_copy(inner_num_, bottom_data + i * dim, scale_data);
    for (size_t j = 0; j < channels; j++) {
      for (size_t k = 0; k < inner_num_; k++) {
        scale_data[k] = std::max(scale_data[k],
            bottom_data[i * dim + j * inner_num_ + k]);
      }
    }
    // subtraction
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_,
        1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data);
    // exponentiation
    caffe_exp<Dtype>(dim, top_data, top_data);
    // sum after exp
    caffe_cpu_gemv<Dtype>(CblasTrans, channels, inner_num_, 1.,
        top_data, sum_multiplier_.cpu_data(), 0., scale_data);
    // division
    for (size_t j = 0; j < channels; j++) {
      caffe_div(inner_num_, top_data, scale_data, top_data);
      top_data += inner_num_;
    }
  }



  // we need to subtract the max to avoid numerical issues, compute the exp,
  // and then normalize.
  // for (int i = 0; i < num; ++i) {
    // scale_data[i] = bottom_data[i*dim];
    // for (size_t j = 0; j < dim; ++j) {
      // scale_data[i] = max(scale_data[i], bottom_data[i * dim + j]);
    // }
  // }
  // // subtraction
  // caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
    // scale_data, sum_multiplier_.cpu_data(), 1., top_data);
  // // Perform exponentiation
  // caffe_exp<Dtype>(num * dim, top_data, top_data);
  // // sum after exp
  // caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_data,
      // sum_multiplier_.cpu_data(), 0., scale_data);
  // // Do division
  // for (int i = 0; i < num; ++i) {
    // caffe_scal<Dtype>(dim, Dtype(1.) / scale_data[i], top_data + i * dim);
  // }
  return Dtype(0);
}
Esempio n. 7
0
void PowerLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    const int count = bottom[0]->count();
    const Dtype* top_diff = top[0]->cpu_diff();
    if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) {
      caffe_set(count, diff_scale_, bottom_diff);
    } else {
      const Dtype* bottom_data = bottom[0]->cpu_data();
      // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1)
      //               = diff_scale * y / (shift + scale * x)
      if (power_ == Dtype(2)) {
        // Special case for y = (shift + scale * x)^2
        //     -> dy/dx = 2 * scale * (shift + scale * x)
        //              = diff_scale * shift + diff_scale * scale * x
        caffe_cpu_axpby(
            count,
            diff_scale_ * scale_,
            bottom_data,
            Dtype(0),
            bottom_diff);

        if (shift_ != Dtype(0)) {
          caffe_add_scalar(count, diff_scale_ * shift_, bottom_diff);
        }
      } else if (shift_ == Dtype(0)) {
        // Special case for y = (scale * x)^power
        //     -> dy/dx = scale * power * (scale * x)^(power - 1)
        //              = scale * power * (scale * x)^power * (scale * x)^(-1)
        //              = power * y / x
        const Dtype* top_data = top[0]->cpu_data();
        caffe_div(count, top_data, bottom_data, bottom_diff);
        caffe_scal(count, power_, bottom_diff);
      } else {
        caffe_copy(count, bottom_data, bottom_diff);
        if (scale_ != Dtype(1)) {
          caffe_scal(count, scale_, bottom_diff);
        }
        if (shift_ != Dtype(0)) {
          caffe_add_scalar(count, shift_, bottom_diff);
        }
        const Dtype* top_data = top[0]->cpu_data();
        caffe_div<Dtype>(count, top_data, bottom_diff, bottom_diff);
        if (diff_scale_ != Dtype(1)) {
          caffe_scal(count, diff_scale_, bottom_diff);
        }
      }
    }
    if (diff_scale_ != Dtype(0)) {
      caffe_mul(count, top_diff, bottom_diff, bottom_diff);
    }
  }
}
Esempio n. 8
0
	void ConvNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
		const vector<Blob<Dtype>*>& top)
	{
		conv_layer->Forward(bottom, conv_top_vec);
		for (int n = 0; n < conv_top_vec[0]->num(); n++)
		{
			caffe_div(norm_top.count(), conv_top_vec[0]->cpu_data() + conv_top_vec[0]->offset(n),
				norm_top.cpu_data(), top[0]->mutable_cpu_data() + top[0]->offset(n));
		}
	}
Esempio n. 9
0
	void ConvNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
	{
		caffe_set(conv_top_vec[0]->count(), (Dtype)0, conv_top_vec[0]->mutable_cpu_diff());
		for (int n = 0; n < conv_top_vec[0]->num(); n++)
		{
			caffe_div(norm_top.count(), top[0]->cpu_diff() + top[0]->offset(n),
				norm_top.cpu_data(), conv_top_vec[0]->mutable_cpu_diff()+conv_top_vec[0]->offset(n));
		}

		conv_layer->Backward(conv_top_vec, propagate_down, bottom);
	}
Esempio n. 10
0
void AbsValLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const int count = top[0]->count();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  if (propagate_down[0]) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    caffe_div(count, top_data, bottom_data, bottom_diff);
    caffe_mul(count, bottom_diff, top_diff, bottom_diff);
  }
}
Esempio n. 11
0
void EltwiseLayer<Dtype, MItype, MOtype>::Backward_cpu(const vector<Blob<MOtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<MItype>*>& bottom) {
  const int_tp* mask = NULL;
  const int_tp count = top[0]->count();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  for (int_tp i = 0; i < bottom.size(); ++i) {
    if (propagate_down[i]) {
      const Dtype* bottom_data = bottom[i]->cpu_data();
      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
      switch (op_) {
      case EltwiseParameter_EltwiseOp_PROD:
        if (stable_prod_grad_) {
          bool initialized = false;
          for (int_tp j = 0; j < bottom.size(); ++j) {
            if (i == j) { continue; }
            if (!initialized) {
              caffe_copy(count, bottom[j]->cpu_data(), bottom_diff);
              initialized = true;
            } else {
              caffe_mul(count, bottom[j]->cpu_data(), bottom_diff,
                        bottom_diff);
            }
          }
        } else {
          caffe_div(count, top_data, bottom_data, bottom_diff);
        }
        caffe_mul(count, bottom_diff, top_diff, bottom_diff);
        break;
      case EltwiseParameter_EltwiseOp_SUM:
        if (coeffs_[i] == Dtype(1)) {
          caffe_copy(count, top_diff, bottom_diff);
        } else {
          caffe_scale(count, coeffs_[i], top_diff, bottom_diff);
        }
        break;
      case EltwiseParameter_EltwiseOp_MAX:
        mask = max_idx_.cpu_data();
        for (int_tp index = 0; index < count; ++index) {
          Dtype gradient = 0;
          if (mask[index] == i) {
            gradient += top_diff[index];
          }
          bottom_diff[index] = gradient;
        }
        break;
      default:
        LOG(FATAL) << "Unknown elementwise operation.";
      }
    }
  }
}
Esempio n. 12
0
void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();

  // get the learning rate
  Dtype delta = this->param_.delta();
  Dtype rms_decay = this->param_.rms_decay();
  Dtype local_rate = rate * net_params_lr[param_id];

  switch (Caffe::mode()) {
  case Caffe::CPU:
    // compute square of gradient in update
    caffe_powx(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), Dtype(2),
        this->update_[param_id]->mutable_cpu_data());

    // update history
    caffe_cpu_axpby(net_params[param_id] -> count(),
        Dtype(1-rms_decay), this->update_[param_id]->cpu_data(),
        rms_decay, this->history_[param_id]-> mutable_cpu_data());

    // prepare update
    caffe_powx(net_params[param_id]->count(),
        this->history_[param_id]->cpu_data(), Dtype(0.5),
        this->update_[param_id]->mutable_cpu_data());

    caffe_add_scalar(net_params[param_id]->count(),
        delta, this->update_[param_id]->mutable_cpu_data());

    caffe_div(net_params[param_id]->count(),
        net_params[param_id]->cpu_diff(), this->update_[param_id]->cpu_data(),
        this->update_[param_id]->mutable_cpu_data());

    // scale and copy
    caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
        this->update_[param_id]->cpu_data(), Dtype(0),
        net_params[param_id]->mutable_cpu_diff());
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    rmsprop_update_gpu(net_params[param_id]->count(),
        net_params[param_id]->mutable_gpu_diff(),
        this->history_[param_id]->mutable_gpu_data(),
        rms_decay, delta, local_rate);
#else
    NO_GPU;
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
Esempio n. 13
0
	void MaskingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
		CHECK_GE(this->blobs_.size(), 1);

		// TODO: check gradient formulas (http://ufldl.stanford.edu/tutorial/supervised/MultiLayerNeuralNetworks/)

		if (stable_prod_grad_) {
			if (propagate_down[0]) {
				// Gradient with respect to bottom data
				caffe_mul(top[0]->count(), this->blobs_[0]->cpu_data(), top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff()); // d_i = d_(i+1) .* w
			}

			// Gradient with respect to weights
			caffe_mul(top[0]->count(), bottom[0]->cpu_data(), top[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff()); // d_i = d_(i+1) .* in

			// Gradient with respect to bias
			if (bias_term_) {
				// TODO: check whether there are any smart pointer tricks which can replace the copying overhead
				caffe_copy(top[0]->count(), top[0]->cpu_diff(), this->blobs_[1]->mutable_cpu_diff()); // d_i = d_(i+1)
			}
		} else {
			// less stable gradient computation method inspired by elementwise layer, this is just for comparison/debugging purposes

			if (propagate_down[0]) {
				// Gradient with respect to bottom data
				caffe_div(top[0]->count(), top[0]->cpu_data(), bottom[0]->cpu_data(), bottom[0]->mutable_cpu_diff());
				caffe_mul(top[0]->count(), bottom[0]->cpu_diff(), top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff());
			}

			// Gradient with respect to weights
			caffe_div(top[0]->count(), top[0]->cpu_data(), this->blobs_[0]->cpu_data(), this->blobs_[0]->mutable_cpu_diff());
			caffe_mul(top[0]->count(), this->blobs_[0]->cpu_diff(), top[0]->cpu_diff(), this->blobs_[0]->mutable_cpu_diff());

			// Gradient with respect to bias
			if (bias_term_) {
				caffe_copy(top[0]->count(), top[0]->cpu_diff(), this->blobs_[1]->mutable_cpu_diff()); // d_i = d_(i+1)
			}
		}
	}
Esempio n. 14
0
void MVNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

  int num;
  if (this->layer_param_.mvn_param().across_channels())
    num = bottom[0]->num();
  else
    num = bottom[0]->num() * bottom[0]->channels();

  int dim = bottom[0]->count() / num;

  if (this->layer_param_.mvn_param().normalize_variance()) {
    caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., bottom_diff,
          sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
          mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
          bottom_diff);
    caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);

    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_diff,
            sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
            mean_.cpu_data(), sum_multiplier_.cpu_data(), 1.,
            bottom_diff);

    caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff, Dtype(-1. / dim),
        bottom_diff);

    // put the squares of bottom into temp_
    caffe_powx(temp_.count(), bottom_data, Dtype(2),
        temp_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
        variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
        temp_.mutable_cpu_data());

    caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
  } else {
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, top_diff,
      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
      mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
      temp_.mutable_cpu_data());
    caffe_add(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
  }
}
Esempio n. 15
0
void EltwiseProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
  if (propagate_down) {
    const int count = top[0]->count();
    const Dtype* top_data = top[0]->cpu_data();
    const Dtype* top_diff = top[0]->cpu_diff();
    for (int i = 0; i < bottom->size(); ++i) {
      const Dtype* bottom_data = (*bottom)[i]->cpu_data();
      Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
      caffe_div(count, top_data, bottom_data, bottom_diff);
      caffe_mul(count, bottom_diff, top_diff, bottom_diff);
    }
  }
}
void AbsValLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const bool propagate_down,
    vector<Blob<Dtype>*>* bottom) {
  if (propagate_down) {  
    //const int count = top[0]->count();
    const int count = (*bottom)[0]->count();    
    const Dtype* top_data = top[0]->cpu_data();
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* bottom_data = (*bottom)[0]->cpu_data();
    Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
    
    caffe_div(count, top_data, bottom_data, bottom_diff);
    caffe_mul(count, bottom_diff, top_diff, bottom_diff);
  }
}
Esempio n. 17
0
void MVNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  int num;
  if (this->layer_param_.mvn_param().across_channels())
    num = bottom[0]->num();
  else
    num = bottom[0]->num() * bottom[0]->channels();

  int dim = bottom[0]->count() / num;

  // subtract mean
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, bottom_data,
      sum_multiplier_.cpu_data(), 0., mean_.mutable_cpu_data());  // EX
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
      mean_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
      temp_.mutable_cpu_data());
  caffe_add(temp_.count(), bottom_data, temp_.cpu_data(), top_data);  // X-EX

  if (this->layer_param_.mvn_param().normalize_variance()) {
    // compute variance using var(X) = E((X-EX)^2)
    caffe_powx(bottom[0]->count(), top_data, Dtype(2),
        temp_.mutable_cpu_data());  // (X-EX)^2
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1. / dim, temp_.cpu_data(),
        sum_multiplier_.cpu_data(), 0.,
        variance_.mutable_cpu_data());  // E((X-EX)^2)

    // normalize variance
    caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
          variance_.mutable_cpu_data());

    caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data());

    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, 1.,
          variance_.cpu_data(), sum_multiplier_.cpu_data(), 0.,
          temp_.mutable_cpu_data());

    caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
  }
}
Esempio n. 18
0
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
  const int count = top[0]->count();
  const int* mask = NULL;
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  for (int i = 0; i < bottom->size(); ++i) {
    if (propagate_down[i]) {
      const Dtype* bottom_data = (*bottom)[i]->cpu_data();
      Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
      switch (op_) {
      case EltwiseParameter_EltwiseOp_PROD:
        caffe_div(count, top_data, bottom_data, bottom_diff);
        caffe_mul(count, bottom_diff, top_diff, bottom_diff);
        break;
      case EltwiseParameter_EltwiseOp_SUM:
        if (coeffs_[i] == Dtype(1)) {
          caffe_copy(count, top_diff, bottom_diff);
        } else {
          caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
        }
        break;
	 case EltwiseParameter_EltwiseOp_MAX:
        mask = max_idx_.cpu_data();
        for (int index = 0; index < count; ++index) {
          Dtype gradient = 0;
          if (mask[index] == i) {
            gradient += top_diff[index];
          }
          bottom_diff[index] = gradient;
        }
        break;
      default:
        LOG(FATAL) << "Unknown elementwise operation.";
      }
    }
  }
}
Esempio n. 19
0
void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  Dtype* scale_data = scale_.mutable_cpu_data();
  int channels = bottom[0]->shape(softmax_axis_);
  int dim = bottom[0]->count() / outer_num_;
  caffe_copy(bottom[0]->count(), bottom_data, top_data);
  // We need to subtract the max to avoid numerical issues, compute the exp,
  // and then normalize.
  for (int i = 0; i < outer_num_; ++i) {
    //注意,inner_num_和dim的值并不相等,dim是inner_num_的倍数,为通道数channels倍
    // initialize scale_data to the first plane
    caffe_copy(inner_num_, bottom_data + i * dim, scale_data);
    for (int j = 0; j < channels; j++) {
      for (int k = 0; k < inner_num_; k++) {
        scale_data[k] = std::max(scale_data[k],
            bottom_data[i * dim + j * inner_num_ + k]);
      }
    } //在每张图像中,沿着通道轴上取最大像素值。虽然scale_data所指向内存区域可存储的数据量为outer_num_×1×inner_num_,但每次也只是更新scale_data最前面的inner_num_个元素。scale_只是用来存储中间变量。

    //在caffe_cpu_gemm函数中,从top_data所指的内存区域选取channels×inner_num_=dim个元素进行更新,其他函数类似
    // subtraction
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_,
        1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data);
    // exponentiation
    caffe_exp<Dtype>(dim, top_data, top_data);
    // sum after exp
    caffe_cpu_gemv<Dtype>(CblasTrans, channels, inner_num_, 1.,
        top_data, sum_multiplier_.cpu_data(), 0., scale_data);
    // division
    for (int j = 0; j < channels; j++) {
      caffe_div(inner_num_, top_data, scale_data, top_data);
      //更新top_data指针
      top_data += inner_num_;
    }
  }
}
void BinomialDevianceLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

  Dtype alpha = this->layer_param_.binomial_deviance_loss_param().alpha(); 
 
  if (propagate_down[0]) { 
    Dtype* bout = bottom[0]->mutable_cpu_diff();

    int num = bottom[0]->num();
    for (int i = 0 ; i < num; i++){
      bout[i] = Dtype(1.0); 
    }
    caffe_cpu_axpby(bottom[0]->num(), Dtype(1), exp_.cpu_data(), Dtype(1), bout);
    caffe_div(bottom[0]->num(), exp_.cpu_data(), bout, bout);
    caffe_mul(bottom[0]->num(), M_.cpu_data(), bout, bout);
    caffe_mul(bottom[0]->num(), W_.cpu_data(), bout, bout);   
    caffe_cpu_axpby(bottom[0]->num(), Dtype(0.0), bout, Dtype(-alpha * top[0]->cpu_diff()[0]), bout);

    //// multiply by elimination array
    caffe_mul(bottom[2]->num(), bottom[2]->cpu_data(), bottom[0]->cpu_diff(), bout);
////
  }
}
Esempio n. 21
0
  void BNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      const vector<Blob<Dtype>*>& bottom) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

    Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff();
    Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff();
    const Dtype* scale_data = this->blobs_[0]->cpu_data();

    switch (this->layer_param_.bn_param().bn_mode()) {
    case BNParameter_BNMode_LEARN:
      // Propagate layer to parameters
      // gradient w.r.t. scale
      caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(),
          top_diff, buffer_blob_.mutable_cpu_data());
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_variance_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);

      // gradient w.r.t. shift
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), top_diff,
          spatial_sum_multiplier_.cpu_data(),
          Dtype(0), spatial_mean_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_,
          Dtype(1), spatial_mean_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(),
          Dtype(0), shift_diff);

      // Propagate down

      // put scale * top_diff to buffer_blob_
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
          buffer_blob_.mutable_cpu_data());

      // use new top diff for computation
      caffe_mul(buffer_blob_.count(),  x_norm_.cpu_data(),
          buffer_blob_.cpu_data(), bottom_diff);
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1), bottom_diff,
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_mean_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_mean_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_mean_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          bottom_diff);

      caffe_mul(buffer_blob_.count(),
          x_norm_.cpu_data(), bottom_diff, bottom_diff);

      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_mean_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_mean_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_mean_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff);

      caffe_cpu_axpby(buffer_blob_.count(), Dtype(1),
          buffer_blob_.cpu_data(), Dtype(-1. / (N_ * H_ * W_)),
          bottom_diff);

      // put the squares of bottom into buffer_blob_
      caffe_powx(buffer_blob_.count(), bottom_data, Dtype(2),
          buffer_blob_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_variance_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_div(buffer_blob_.count(), bottom_diff,
          buffer_blob_.cpu_data(), bottom_diff);
      break;
    case BNParameter_BNMode_INFERENCE:
      // Propagate layer to parameters
      // gradient w.r.t. scale
      caffe_mul(buffer_blob_.count(), bottom_data,
          top_diff, buffer_blob_.mutable_cpu_data());
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
          spatial_variance_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);

      // gradient w.r.t. shift
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
          H_ * W_, Dtype(1), top_diff,
          spatial_sum_multiplier_.cpu_data(),
          Dtype(0), spatial_mean_.mutable_cpu_diff());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_,
          Dtype(1), spatial_mean_.cpu_diff(),
          batch_sum_multiplier_.cpu_data(),
          Dtype(0), shift_diff);

      // Propagate down
      // put scale * top_diff to buffer_blob_
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
          bottom_diff);
      break;
    default:
      LOG(FATAL) << "Unknown BN mode.";
    }
  }
Esempio n. 22
0
void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  int num = bottom[0]->shape(0);
  int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_);

  if (bottom[0] != top[0]) {
    caffe_copy(bottom[0]->count(), bottom_data, top_data);
  }

  if (use_global_stats_) {
    // use the stored mean/variance estimates.
    const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
        0 : 1 / this->blobs_[2]->cpu_data()[0];
    caffe_cpu_scale(variance_.count(), scale_factor,
        this->blobs_[0]->cpu_data(), mean_.mutable_cpu_data());
    caffe_cpu_scale(variance_.count(), scale_factor,
        this->blobs_[1]->cpu_data(), variance_.mutable_cpu_data());
  } else {
    // compute mean
    caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
        1. / (num * spatial_dim), bottom_data,
        spatial_sum_multiplier_.cpu_data(), 0.,
        num_by_chans_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
        num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
        mean_.mutable_cpu_data());
  }

  // subtract mean
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
      spatial_dim, 1, -1, num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 1., top_data);

  if (!use_global_stats_) {
    // compute variance using var(X) = E((X-EX)^2)
    caffe_powx(top[0]->count(), top_data, Dtype(2),
        temp_.mutable_cpu_data());  // (X-EX)^2
    caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
        1. / (num * spatial_dim), temp_.cpu_data(),
        spatial_sum_multiplier_.cpu_data(), 0.,
        num_by_chans_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
        num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
        variance_.mutable_cpu_data());  // E((X_EX)^2)

    // compute and save moving average
    this->blobs_[2]->mutable_cpu_data()[0] *= moving_average_fraction_;
    this->blobs_[2]->mutable_cpu_data()[0] += 1;
    caffe_cpu_axpby(mean_.count(), Dtype(1), mean_.cpu_data(),
        moving_average_fraction_, this->blobs_[0]->mutable_cpu_data());
    int m = bottom[0]->count()/channels_;
    Dtype bias_correction_factor = m > 1 ? Dtype(m)/(m-1) : 1;
    caffe_cpu_axpby(variance_.count(), bias_correction_factor,
        variance_.cpu_data(), moving_average_fraction_,
        this->blobs_[1]->mutable_cpu_data());
  }

  // normalize variance
  caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data());
  caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
             variance_.mutable_cpu_data());

  // replicate variance to input size
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), variance_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
      spatial_dim, 1, 1., num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 0., temp_.mutable_cpu_data());
  caffe_div(temp_.count(), top_data, temp_.cpu_data(), top_data);
  // TODO(cdoersch): The caching is only needed because later in-place layers
  //                 might clobber the data.  Can we skip this if they won't?
  caffe_copy(x_norm_.count(), top_data,
      x_norm_.mutable_cpu_data());
}
Esempio n. 23
0
  void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();
    const Dtype* const_top_data = top[0]->cpu_data();

    const Dtype* scale_data = this->blobs_[0]->cpu_data();
    const Dtype* shift_data = this->blobs_[1]->cpu_data();

    switch (this->layer_param_.bn_param().bn_mode()) {
    case BNParameter_BNMode_LEARN:
      // put the squares of bottom into buffer_blob_
      caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
          buffer_blob_.mutable_cpu_data());

      // computes variance using var(X) = E(X^2) - (EX)^2
      // EX across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1. / (H_ * W_)), bottom_data,
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());
      // EX across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
          spatial_mean_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_mean_.mutable_cpu_data());

      // E(X^2) across spatial
      caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
          Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      // E(X^2) across batch
      caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
          spatial_variance_.cpu_data(),
          batch_sum_multiplier_.cpu_data(), Dtype(0),
          batch_variance_.mutable_cpu_data());

      caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2),
          buffer_blob_.mutable_cpu_data());  // (EX)^2
      caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(),
          buffer_blob_.cpu_data(),
          batch_variance_.mutable_cpu_data());  // variance

      // save top[1] (batch_mean) and top[2] (batch_variance)
      if (top.size() > 1) {
          caffe_copy(batch_mean_.count(), batch_mean_.cpu_data(),
              top[1]->mutable_cpu_data());
      }
      if (top.size() > 2) {
          caffe_copy(batch_variance_.count(), batch_variance_.cpu_data(),
              top[2]->mutable_cpu_data());
      }

      // do mean and variance normalization
      // subtract mean
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
          C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_mean_.cpu_data(), Dtype(0),
          spatial_mean_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(-1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_add(buffer_blob_.count(), bottom_data,
          buffer_blob_.cpu_data(), top_data);

      // normalize variance
      caffe_add_scalar(batch_variance_.count(), var_eps_,
        batch_variance_.mutable_cpu_data());
      caffe_powx(batch_variance_.count(),
          batch_variance_.cpu_data(), Dtype(0.5),
          batch_variance_.mutable_cpu_data());

      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
          C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(),
          batch_variance_.cpu_data(), Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());

      caffe_div(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);

      // Saving x_norm
      caffe_copy(buffer_blob_.count(), const_top_data,
          x_norm_.mutable_cpu_data());
      // scale
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), top_data,
          buffer_blob_.cpu_data(), top_data);

      // shift
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_add(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);
      break;
    case BNParameter_BNMode_INFERENCE:
      // scale
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
          spatial_variance_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
          H_ * W_, 1, Dtype(1),
          spatial_variance_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_mul(buffer_blob_.count(), bottom_data,
          buffer_blob_.cpu_data(), top_data);

      // shift
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
          batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
          spatial_mean_.mutable_cpu_data());
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          N_ * C_, H_ * W_, 1, Dtype(1),
          spatial_mean_.cpu_data(),
          spatial_sum_multiplier_.cpu_data(), Dtype(0),
          buffer_blob_.mutable_cpu_data());
      caffe_add(buffer_blob_.count(), const_top_data,
          buffer_blob_.cpu_data(), top_data);
      break;
    default:
      LOG(FATAL) << "Unknown BN mode.";
    } 
  }
Esempio n. 24
0
void BNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const Dtype* const_bottom_diff = bottom[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  const Dtype* const_top_diff = top[0]->cpu_diff();

  Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff();
  Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff();
  const Dtype* scale_data = this->blobs_[0]->cpu_data();

  // gradient w.r.t. slope
  caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(), const_top_diff,
      broadcast_buffer_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
      Dtype(1), broadcast_buffer_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), Dtype(0),
      spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
      spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
      Dtype(1), scale_diff);

  // gradient w.r.t. bias
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
      Dtype(1), const_top_diff, spatial_sum_multiplier_.cpu_data(),
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
      spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
      Dtype(1), shift_diff);

  // gradient w.r.t. normalized inputs
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data,
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(1), spatial_statistic_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), Dtype(0),
      broadcast_buffer_.mutable_cpu_data());
  caffe_mul(broadcast_buffer_.count(), const_top_diff,
      broadcast_buffer_.cpu_data(), broadcast_buffer_.mutable_cpu_data());

  // sum of x_hat * (dl / dx_hat)
  caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(),
      broadcast_buffer_.cpu_data(), bottom_diff);
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
      Dtype(1), const_bottom_diff, spatial_sum_multiplier_.cpu_data(),
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
      spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
      Dtype(0), batch_statistic_.mutable_cpu_data());

  // x_hat times the sum
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(1),
      spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
      Dtype(0), bottom_diff);
  caffe_mul(broadcast_buffer_.count(), x_norm_.cpu_data(),
      const_bottom_diff, bottom_diff);

  // Subtract the average of x_hat times the sum
  caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
      Dtype(1), broadcast_buffer_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), Dtype(0),
      spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1),
      spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
      Dtype(0), batch_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(1),
      spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
      Dtype(1), bottom_diff);
  caffe_cpu_axpby(broadcast_buffer_.count(), Dtype(1),
      broadcast_buffer_.cpu_data(), Dtype(-1) / (num_ * height_ * width_),
      bottom_diff);

  // Divide by the std
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), x_std_.cpu_data(),
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(1),
      spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
      Dtype(0), broadcast_buffer_.mutable_cpu_data());
  caffe_div(broadcast_buffer_.count(), const_bottom_diff,
      broadcast_buffer_.cpu_data(), bottom_diff);
}
Esempio n. 25
0
void BatchNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff;
  if (bottom[0] != top[0]) {
    top_diff = top[0]->cpu_diff();
  } else {
    caffe_copy(x_norm_.count(), top[0]->cpu_diff(), x_norm_.mutable_cpu_diff());
    top_diff = x_norm_.cpu_diff();
  }
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  if (use_global_stats_) {
    caffe_div(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
    return;
  }
  const Dtype* top_data = x_norm_.cpu_data();
  int num = bottom[0]->shape()[0];
  int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_);
  // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then
  //
  // dE(Y)/dX =
  //   (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y)
  //     ./ sqrt(var(X) + eps)
  //
  // where \cdot and ./ are hadamard product and elementwise division,
  // respectively, dE/dY is the top diff, and mean/var/sum are all computed
  // along all dimensions except the channels dimension.  In the above
  // equation, the operations allow for expansion (i.e. broadcast) along all
  // dimensions except the channels dimension where required.

  // sum(dE/dY \cdot Y)
  caffe_mul(temp_.count(), top_data, top_diff, bottom_diff);
  caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1.,
      bottom_diff, spatial_sum_multiplier_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
      num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
      mean_.mutable_cpu_data());

  // reshape (broadcast) the above
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels_ * num,
      spatial_dim, 1, 1., num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 0., bottom_diff);

  // sum(dE/dY \cdot Y) \cdot Y
  caffe_mul(temp_.count(), top_data, bottom_diff, bottom_diff);

  // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
  caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim, 1.,
      top_diff, spatial_sum_multiplier_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
      num_by_chans_.cpu_data(), batch_sum_multiplier_.cpu_data(), 0.,
      mean_.mutable_cpu_data());
  // reshape (broadcast) the above to make
  // sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, channels_, 1, 1,
      batch_sum_multiplier_.cpu_data(), mean_.cpu_data(), 0.,
      num_by_chans_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num * channels_,
      spatial_dim, 1, 1., num_by_chans_.cpu_data(),
      spatial_sum_multiplier_.cpu_data(), 1., bottom_diff);

  // dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y
  caffe_cpu_axpby(temp_.count(), Dtype(1), top_diff,
      Dtype(-1. / (num * spatial_dim)), bottom_diff);

  // note: temp_ still contains sqrt(var(X)+eps), computed during the forward
  // pass.
  caffe_div(temp_.count(), bottom_diff, temp_.cpu_data(), bottom_diff);
}
Esempio n. 26
0
void AdaGradSolver<Dtype>::ComputeUpdateValue() {
  vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
  vector<float>& net_params_lr = this->net_->params_lr();
  vector<float>& net_params_weight_decay = this->net_->params_weight_decay();
  // get the learning rate
  Dtype rate = this->GetLearningRate();
  Dtype delta = this->param_.delta();
  if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
    LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
  }
  Dtype weight_decay = this->param_.weight_decay();
  string regularization_type = this->param_.regularization_type();
  switch (Caffe::mode()) {
  case Caffe::CPU:
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else if (regularization_type == "L1") {
          caffe_cpu_sign(net_params[param_id]->count(),
              net_params[param_id]->cpu_data(),
              this->temp_[param_id]->mutable_cpu_data());
          caffe_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->cpu_data(),
              net_params[param_id]->mutable_cpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
          net_params[param_id]->cpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
          this->update_[param_id]->cpu_data(),
          this->history_[param_id]->cpu_data(),
          this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                this->history_[param_id]->cpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(),
                net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->cpu_data(), Dtype(0),
          net_params[param_id]->mutable_cpu_diff());
    }
    break;
  case Caffe::GPU:
#ifndef CPU_ONLY
    for (int param_id = 0; param_id < net_params.size(); ++param_id) {
      Dtype local_rate = rate * net_params_lr[param_id];
      Dtype local_decay = weight_decay * net_params_weight_decay[param_id];

      if (local_decay) {
        if (regularization_type == "L2") {
          // add weight decay
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              net_params[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else if (regularization_type == "L1") {
          caffe_gpu_sign(net_params[param_id]->count(),
              net_params[param_id]->gpu_data(),
              this->temp_[param_id]->mutable_gpu_data());
          caffe_gpu_axpy(net_params[param_id]->count(),
              local_decay,
              this->temp_[param_id]->gpu_data(),
              net_params[param_id]->mutable_gpu_diff());
        } else {
          LOG(FATAL) << "Unknown regularization type: " << regularization_type;
        }
      }

      // compute square of gradient in update
      caffe_gpu_powx(net_params[param_id]->count(),
          net_params[param_id]->gpu_diff(), Dtype(2),
          this->update_[param_id]->mutable_gpu_data());

      // update history
      caffe_gpu_add(net_params[param_id]->count(),
          this->update_[param_id]->gpu_data(),
          this->history_[param_id]->gpu_data(),
          this->history_[param_id]->mutable_gpu_data());

      // prepare update
      caffe_gpu_powx(net_params[param_id]->count(),
                this->history_[param_id]->gpu_data(), Dtype(0.5),
                this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_add_scalar(net_params[param_id]->count(),
                delta, this->update_[param_id]->mutable_gpu_data());

      caffe_gpu_div(net_params[param_id]->count(),
                net_params[param_id]->gpu_diff(),
                this->update_[param_id]->gpu_data(),
                this->update_[param_id]->mutable_gpu_data());

      // scale and copy
      caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
          this->update_[param_id]->gpu_data(), Dtype(0),
          net_params[param_id]->mutable_gpu_diff());
    }
#else
    NO_GPU;
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode();
  }
}
Esempio n. 27
0
	void DeconvNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
	{
		const Dtype* top_diff = top[0]->cpu_diff();
		Dtype* deconv1_top_vec_diff = deconv1_top_vec[0]->mutable_cpu_diff();
		Dtype* deconv2_top_vec_diff = deconv2_top_vec[0]->mutable_cpu_diff();
		const Dtype* deconv2_top_vec_data = deconv2_top_vec[0]->cpu_data();
		const Dtype* deconv1_top_vec_data = deconv1_top_vec[0]->cpu_data();

		caffe_set(deconv2_top_vec[0]->count(), (Dtype)0, deconv2_top_vec_diff);
		caffe_set(deconv1_top_vec[0]->count(), (Dtype)0, deconv1_top_vec_diff);
		caffe_set(exp_top_vec[0]->count(), (Dtype)0, exp_top_vec[0]->mutable_cpu_diff());
		//caffe_set(exp_bottom_vec[0]->count(), (Dtype)0, exp_bottom_vec[0]->mutable_cpu_diff());

		caffe_set(deconv1_layer->blobs()[0]->count(), (Dtype)0, deconv1_layer->blobs()[0]->mutable_cpu_diff());
		caffe_set(deconv2_layer->blobs()[0]->count(), (Dtype)0, deconv2_layer->blobs()[0]->mutable_cpu_diff());

		//bias gradient, if necessary
		if (this->bias_term_ && this->param_propagate_down_[2])
		{
			Dtype* bias_diff = this->blobs_[2]->mutable_cpu_diff();
			for (int n = 0; n < top[0]->num(); ++n)
			{
				caffe_cpu_gemv<Dtype>(CblasNoTrans, top[0]->channels(), top[0]->height() * top[0]->width(), 
					1., top_diff+top[0]->offset(n), bias_multiplier.cpu_data(), 1., bias_diff);
			}
		}
		// weights and alpha gradient, propagate down to bottom
		if (param_propagate_down_[0] || param_propagate_down_[1] || propagate_down[0])
		{
			vector<bool> no_propagate_down;
			no_propagate_down.push_back(false);
			vector<bool> yes_propagate_down;
			yes_propagate_down.push_back(true);
			// top_diff backward to deconv2_top_vec_diff
			for (int n = 0; n < top[0]->num(); ++n)
			{
				caffe_div(deconv1_top_vec[0]->count(), top_diff + top[0]->offset(n),
					deconv1_top_vec_data, deconv2_top_vec_diff + deconv2_top_vec[0]->offset(n));
			}
			// backward throud deconv2_layer
			deconv2_layer->Backward(deconv2_top_vec, propagate_down, bottom);
			const Dtype* wa_diff = weights_alphas->cpu_diff();
			// weight gradient
			if (param_propagate_down_[0])
			{
				Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
				const Dtype* alpha = alphas->cpu_data();
				for (int ch_in = 0; ch_in < weights_alphas->num(); ++ch_in)
				{
					caffe_mul(alphas->count(), wa_diff + weights_alphas->offset(ch_in),
						alpha, weight_diff + this->blobs_[0]->offset(ch_in));
				}
			}

			// alpha gradient
			if (param_propagate_down_[1] && average_train)
			{
				//alpha_diff1
				Dtype* alpha_cache_diff = alpha_cache.mutable_cpu_diff();
				Dtype* alpha_cache_diff2 = alpha_cache2.mutable_cpu_diff();
				caffe_set(alpha_cache.count(), (Dtype)0, alpha_cache_diff);
				caffe_set(alpha_cache2.count(), (Dtype)0, alpha_cache_diff2);
				const Dtype* weight = this->blobs_[0]->cpu_data();
				for (int ch_in = 0; ch_in < weights_alphas->num(); ++ch_in)
				{
					caffe_mul(alpha_cache.count(), wa_diff + weights_alphas->offset(ch_in),
						weight + this->blobs_[0]->offset(ch_in), alpha_cache_diff);
					caffe_add(alpha_cache2.count(), alpha_cache_diff, alpha_cache_diff2, alpha_cache_diff2);
				}
				// top_diff backward to deonv1_top_vec_diff
				Dtype* deconv1_top_cache_diff = deconv1_top_cache.mutable_cpu_diff();
				caffe_set(deconv1_top_cache.count(), (Dtype)0, deconv1_top_cache_diff);
				for (int n = 0; n < top[0]->num(); ++n)
				{
					caffe_mul(deconv1_top_cache.count(), top_diff + top[0]->offset(n),
						deconv2_top_vec_data + deconv2_top_vec[0]->offset(n), deconv1_top_cache_diff);
					caffe_add(deconv1_top_cache.count(), deconv1_top_cache_diff, deconv1_top_vec_diff, deconv1_top_vec_diff);
				}
				caffe_div(deconv1_top_cache.count(), deconv1_top_vec_diff,
					deconv1_top_vec_data, deconv1_top_vec_diff);
				caffe_div(deconv1_top_cache.count(), deconv1_top_vec_diff,
					deconv1_top_vec_data, deconv1_top_vec_diff);
				// backward through deconv1_layer
				deconv1_layer->Backward(deconv1_top_vec, no_propagate_down, deconv1_bottom_vec);

				// alpha_diff2
				Dtype* alpha_diff = alphas->mutable_cpu_diff();
				//fuse alpha_diff1 and alpha_diff2
				caffe_sub(alpha_cache.count(), alpha_cache_diff2, alpha_diff, alpha_diff);

				exp_layer->Backward(exp_top_vec, yes_propagate_down, exp_bottom_vec);
			}
		}
	}
Esempio n. 28
0
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
  const vector<Blob<Dtype>*>& top) {
  const Dtype* const_bottom_data = bottom[0]->cpu_data();
  const Dtype* const_top_data = top[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();

  const Dtype* scale_data = this->blobs_[0]->cpu_data();
  const Dtype* shift_data = this->blobs_[1]->cpu_data();

  // Mean normalization
  if (this->phase_ == TEST && moving_average_) {
    // Use the moving average mean
    caffe_copy(batch_statistic_.count(), this->blobs_[2]->cpu_data(),
        batch_statistic_.mutable_cpu_data());
  } else if (this->phase_ == TRAIN) {
    // Compute the mean by averaging over spatial and batch dimensions.
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
        Dtype(1) / (height_ * width_), const_bottom_data,
        spatial_sum_multiplier_.cpu_data(), Dtype(0),
        spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_,
        Dtype(1) / num_, spatial_statistic_.cpu_data(),
        batch_sum_multiplier_.cpu_data(), Dtype(0),
        batch_statistic_.mutable_cpu_data());
    // Add to the moving average
    if (moving_average_) {
      caffe_cpu_axpby(batch_statistic_.count(),
          Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(),
          bn_momentum_, this->blobs_[2]->mutable_cpu_data());
    }
  }
  // Broadcast the mean vector
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(-1),
      spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
      Dtype(0), broadcast_buffer_.mutable_cpu_data());
  // Subtract
  caffe_add(broadcast_buffer_.count(), const_bottom_data,
      broadcast_buffer_.cpu_data(), top_data);

  // Variance normalization
  if (this->phase_ == TEST && moving_average_) {
    // Use the moving average mean
    caffe_copy(batch_statistic_.count(), this->blobs_[3]->cpu_data(),
        batch_statistic_.mutable_cpu_data());
  } else if (this->phase_ == TRAIN) {
    caffe_powx(broadcast_buffer_.count(), const_top_data, Dtype(2),
        broadcast_buffer_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasNoTrans, num_ * channels_, height_ * width_,
        Dtype(1) / (height_ * width_), broadcast_buffer_.cpu_data(),
        spatial_sum_multiplier_.cpu_data(), Dtype(0),
        spatial_statistic_.mutable_cpu_data());
    caffe_cpu_gemv<Dtype>(CblasTrans, num_, channels_, Dtype(1) / num_,
        spatial_statistic_.cpu_data(), batch_sum_multiplier_.cpu_data(),
        Dtype(0), batch_statistic_.mutable_cpu_data());
    // Add to the moving average
    if (moving_average_) {
      caffe_cpu_axpby(batch_statistic_.count(),
          Dtype(1) - bn_momentum_, batch_statistic_.cpu_data(),
          bn_momentum_, this->blobs_[3]->mutable_cpu_data());
    }
  }
  // Add eps
  caffe_add_scalar(batch_statistic_.count(), bn_eps_,
      batch_statistic_.mutable_cpu_data());
  // Standard deviation
  caffe_powx(batch_statistic_.count(), batch_statistic_.cpu_data(),
      Dtype(0.5), batch_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), batch_statistic_.cpu_data(),
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(1),
      spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
      Dtype(0), broadcast_buffer_.mutable_cpu_data());
  // Divide by the std
  caffe_div(broadcast_buffer_.count(), const_top_data,
      broadcast_buffer_.cpu_data(), top_data);

  // Save the normalized inputs and std for backprop
  caffe_copy(broadcast_buffer_.count(), const_top_data,
      x_norm_.mutable_cpu_data());
  caffe_copy(batch_statistic_.count(), batch_statistic_.cpu_data(),
      x_std_.mutable_cpu_data());

  // Scale
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), scale_data,
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(1),
      spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
      Dtype(0), broadcast_buffer_.mutable_cpu_data());
  caffe_mul(broadcast_buffer_.count(), const_top_data,
      broadcast_buffer_.cpu_data(), top_data);

  // Shift
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_, channels_, 1,
      Dtype(1), batch_sum_multiplier_.cpu_data(), shift_data,
      Dtype(0), spatial_statistic_.mutable_cpu_data());
  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_ * channels_,
      height_ * width_, 1, Dtype(1),
      spatial_statistic_.cpu_data(), spatial_sum_multiplier_.cpu_data(),
      Dtype(0), broadcast_buffer_.mutable_cpu_data());
  caffe_add(broadcast_buffer_.count(), const_top_data,
      broadcast_buffer_.cpu_data(), top_data);
}
Esempio n. 29
0
void AdaGradSolver<Dtype>::ComputeUpdateValue(uint_tp param_id, Dtype rate) {
  CHECK(Caffe::root_solver());
  const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
  const vector<float>& net_params_lr = this->net_->params_lr();
  Dtype delta = this->param_.delta();
  Dtype local_rate = rate * net_params_lr[param_id];
  switch (Caffe::mode()) {
    case Caffe::CPU: {
      // compute square of gradient in update
      caffe_powx(net_params[param_id]->count(),
                 net_params[param_id]->cpu_diff(), Dtype(2),
                 this->update_[param_id]->mutable_cpu_data());

      // update history
      caffe_add(net_params[param_id]->count(),
                this->update_[param_id]->cpu_data(),
                this->history_[param_id]->cpu_data(),
                this->history_[param_id]->mutable_cpu_data());

      // prepare update
      caffe_powx(net_params[param_id]->count(),
                 this->history_[param_id]->cpu_data(), Dtype(0.5),
                 this->update_[param_id]->mutable_cpu_data());

      caffe_add_scalar(net_params[param_id]->count(), delta,
                       this->update_[param_id]->mutable_cpu_data());

      caffe_div(net_params[param_id]->count(), net_params[param_id]->cpu_diff(),
                this->update_[param_id]->cpu_data(),
                this->update_[param_id]->mutable_cpu_data());

      // scale and copy
      caffe_cpu_axpby(net_params[param_id]->count(), local_rate,
                      this->update_[param_id]->cpu_data(), Dtype(0),
                      net_params[param_id]->mutable_cpu_diff());
      break;
    }
    case Caffe::GPU: {
#ifndef CPU_ONLY
      if (this->device_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
        // compute square of gradient in update
        caffe_gpu_powx(net_params[param_id]->count(),
                       net_params[param_id]->gpu_diff(), Dtype(2),
                       this->update_[param_id]->mutable_gpu_data());

        // update history
        caffe_gpu_add(net_params[param_id]->count(),
                      this->update_[param_id]->gpu_data(),
                      this->history_[param_id]->gpu_data(),
                      this->history_[param_id]->mutable_gpu_data());

        // prepare update
        caffe_gpu_powx(net_params[param_id]->count(),
                       this->history_[param_id]->gpu_data(), Dtype(0.5),
                       this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_add_scalar(net_params[param_id]->count(), delta,
                             this->update_[param_id]->mutable_gpu_data());

        caffe_gpu_div(net_params[param_id]->count(),
                      net_params[param_id]->gpu_diff(),
                      this->update_[param_id]->gpu_data(),
                      this->update_[param_id]->mutable_gpu_data());

        // scale and copy
        caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
                        this->update_[param_id]->gpu_data(), Dtype(0),
                        net_params[param_id]->mutable_gpu_diff());
#endif  // USE_CUDA
      } else {
#ifdef USE_GREENTEA
        // compute square of gradient in update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0, Dtype(2),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // update history
        greentea_gpu_add<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->gpu_data()), 0,
            (cl_mem) (this->history_[param_id]->mutable_gpu_data()), 0);

        // prepare update
        greentea_gpu_powx<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (this->history_[param_id]->gpu_data()), 0, Dtype(0.5),
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_add_scalar<Dtype>(
            this->device_->id(), net_params[param_id]->count(), delta,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        greentea_gpu_div<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            (cl_mem) (net_params[param_id]->gpu_diff()), 0,
            (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            (cl_mem) (this->update_[param_id]->mutable_gpu_data()), 0);

        // scale and copy
        greentea_gpu_axpby<Dtype>(
            this->device_->id(), net_params[param_id]->count(),
            local_rate, (cl_mem) (this->update_[param_id]->gpu_data()), 0,
            Dtype(0), (cl_mem) (net_params[param_id]->mutable_gpu_diff()), 0);
#endif  // USE_GREENTEA
      }
#else
      NO_GPU;
#endif
      break;
    }
    default:
      LOG(FATAL)<< "Unknown caffe mode: " << Caffe::mode();
    }
  }
Esempio n. 30
0
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const int* mask = NULL;
  const int count = top[0]->count();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  if (broadcast_) {
    bool broadcasted[2];
    int i, j;
    broadcasted[0] = broadcasted[1] = false;
    for (int i=0; i<4; i++) {
      if (bottom[0]->shape()[i] > bottom[1]->shape()[i]) broadcasted[1] = true;
      if (bottom[0]->shape()[i] < bottom[1]->shape()[i]) broadcasted[0] = true;
    }

    i=0; j=1; //i -> not broadcasted  j-> broadcasted
    if (broadcasted[0]){ i=1; j=0; }

    int dima[4], dimb[4];
    const Dtype* bot_data = bottom[i]->cpu_data();
    const Dtype* bot_data_brd = bottom[j]->cpu_data();
    Dtype*       bot_diff = bottom[i]->mutable_cpu_diff();
    Dtype*       bot_diff_brd = bottom[j]->mutable_cpu_diff();

    for (int n=0; n<4; n++) dima[n] = bottom[i]->shape()[n];
    for (int n=0; n<4; n++) dimb[n] = bottom[j]->shape()[n];

    switch(op_)
    {
    case EltwiseParameter_EltwiseOp_PROD:
      if (propagate_down[j]) {
        int n=0;
        for (int x=0; x<4; x++) n *= dima[x];
        caffe_mul<Dtype>(n, top_diff, bot_data, bot_diff);
        caffe_sum_reduce<Dtype>(dima, dimb, bot_diff, bot_diff_brd);
        caffe_set(n, Dtype(0), bot_diff);
      }
      
      if (propagate_down[i])
        caffe_mul_broadcast<Dtype>(dima, dimb, top_diff, bot_data_brd, bot_diff);

      break;
    case EltwiseParameter_EltwiseOp_SUM:
      if (propagate_down[j]) 
        caffe_sum_reduce<Dtype>(dima, dimb, top_diff, bot_diff_brd);

      if (propagate_down[i]) {
        int n=0;
        for (int x=0; x<4; x++) n *= dima[x];
        caffe_copy<Dtype>(n, top_diff, bot_diff);
      }
      break;
    default:
      LOG(FATAL) << "Unknown elementwise operation.";
    }
  } else {
    for (int i = 0; i < bottom.size(); ++i) {
      if (propagate_down[i]) {
        const Dtype* bottom_data = bottom[i]->cpu_data();
        Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
        switch (op_) {
        case EltwiseParameter_EltwiseOp_PROD:
          if (stable_prod_grad_) {
            bool initialized = false;
            for (int j = 0; j < bottom.size(); ++j) {
              if (i == j) { continue; }
              if (!initialized) {
                caffe_copy(count, bottom[j]->cpu_data(), bottom_diff);
                initialized = true;
              } else {
                caffe_mul(count, bottom[j]->cpu_data(), bottom_diff,
                          bottom_diff);
              }
            }
          } else {
            caffe_div(count, top_data, bottom_data, bottom_diff);
          }
          caffe_mul(count, bottom_diff, top_diff, bottom_diff);
          break;
        case EltwiseParameter_EltwiseOp_SUM:
          if (coeffs_[i] == Dtype(1)) {
            caffe_copy(count, top_diff, bottom_diff);
          } else {
            caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
          }
          break;
        case EltwiseParameter_EltwiseOp_MAX:
          mask = max_idx_.cpu_data();
          for (int index = 0; index < count; ++index) {
            Dtype gradient = 0;
            if (mask[index] == i) {
              gradient += top_diff[index];
            }
            bottom_diff[index] = gradient;
          }
          break;
        default:
          LOG(FATAL) << "Unknown elementwise operation.";
        }
      }
    }
  }
}