예제 #1
0
void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
    vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top,
    int check_bottom, int top_id, int top_data_id, bool element_wise) {
  if (element_wise) {
    CHECK_EQ(0, layer->blobs().size());
    CHECK_LE(0, top_id);
    CHECK_LE(0, top_data_id);
    const int top_count = (*top)[top_id]->count();
    for (int blob_id = 0; blob_id < bottom->size(); ++blob_id) {
      CHECK_EQ(top_count, (*bottom)[blob_id]->count());
    }
  }
  // First, figure out what blobs we need to check against.
  vector<Blob<Dtype>*> blobs_to_check;
  for (int i = 0; i < layer->blobs().size(); ++i) {
    blobs_to_check.push_back(layer->blobs()[i].get());
  }
  if (check_bottom < 0) {
    for (int i = 0; i < bottom->size(); ++i) {
      blobs_to_check.push_back((*bottom)[i]);
    }
  } else {
    CHECK(check_bottom < bottom->size());
    blobs_to_check.push_back((*bottom)[check_bottom]);
  }
  // Compute the gradient analytically using Backward
  Caffe::set_random_seed(seed_);
  // Get any loss from the layer
  Dtype computed_objective = layer->Forward(*bottom, top);
  // Get additional loss from the objective
  computed_objective += GetObjAndGradient(top, top_id, top_data_id);
  layer->Backward(*top, true, bottom);
  // Store computed gradients for all checked blobs
  vector<shared_ptr<Blob<Dtype> > >
      computed_gradient_blobs(blobs_to_check.size());
  for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    computed_gradient_blobs[blob_id].reset(new Blob<Dtype>());
    computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob);
    const int count = blobs_to_check[blob_id]->count();
    const Dtype* diff = blobs_to_check[blob_id]->cpu_diff();
    Dtype* computed_gradients =
        computed_gradient_blobs[blob_id]->mutable_cpu_data();
    caffe_copy(count, diff, computed_gradients);
  }
  // Compute derivative of top w.r.t. each bottom and parameter input using
  // finite differencing.
  // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
  for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    const Dtype* computed_gradients =
        computed_gradient_blobs[blob_id]->cpu_data();
    // LOG(ERROR) << "Blob " << blob_id << ": checking "
    //     << current_blob->count() << " parameters.";
    for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
      // For an element-wise layer, we only need to do finite differencing to
      // compute the derivative of (*top)[top_id][top_data_id] w.r.t.
      // (*bottom)[blob_id][i] only for i == top_data_id.  For any other
      // i != top_data_id, we know the derivative is 0 by definition, and simply
      // check that that's true.
      Dtype estimated_gradient = 0;
      if (!element_wise || (feat_id == top_data_id)) {
        // Do finite differencing.
        // Compute loss with stepsize_ added to input.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        Caffe::set_random_seed(seed_);
        Dtype positive_objective = layer->Forward(*bottom, top);
        positive_objective += GetObjAndGradient(top, top_id, top_data_id);
        // Compute loss with stepsize_ subtracted from input.
        current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
        Caffe::set_random_seed(seed_);
        Dtype negative_objective = layer->Forward(*bottom, top);
        negative_objective += GetObjAndGradient(top, top_id, top_data_id);
        // Recover original input value.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        estimated_gradient = (positive_objective - negative_objective) /
            stepsize_ / 2.;
      }
      Dtype computed_gradient = computed_gradients[feat_id];
      Dtype feature = current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
      //     << current_blob->cpu_diff()[feat_id];
      if (kink_ - kink_range_ > fabs(feature)
          || fabs(feature) > kink_ + kink_range_) {
        // We check relative accuracy, but for too small values, we threshold
        // the scale factor by 1.
        Dtype scale = max(
            max(fabs(computed_gradient), fabs(estimated_gradient)), 1.);
        EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale)
          << "debug: (top_id, top_data_id, blob_id, feat_id)="
          << top_id << "," << top_data_id << "," << blob_id << "," << feat_id;
      }
      // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "computed gradient: " << computed_gradient
      //    << " estimated_gradient: " << estimated_gradient;
    }
  }
}
예제 #2
0
void GradientChecker<Dtype>::CheckGradientSingle(
    Layer<Dtype>* layer, const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top, int_tp check_bottom, int_tp top_id,
    int_tp top_data_id,
    bool element_wise) {
  if (element_wise) {
    CHECK_EQ(0, layer->blobs().size());
    CHECK_LE(0, top_id);
    CHECK_LE(0, top_data_id);
    const int_tp top_count = top[top_id]->count();
    for (int_tp blob_id = 0; blob_id < bottom.size(); ++blob_id) {
      CHECK_EQ(top_count, bottom[blob_id]->count());
    }
  }
  // First, figure out what blobs we need to check against, and zero init
  // parameter blobs.
  vector<Blob<Dtype>*> blobs_to_check;
  vector<bool> propagate_down(bottom.size(), check_bottom == -1);
  for (int_tp i = 0; i < layer->blobs().size(); ++i) {
    Blob<Dtype>* blob = layer->blobs()[i].get();
    caffe_set(blob->count(), static_cast<Dtype>(0), blob->mutable_cpu_diff());
    blobs_to_check.push_back(blob);
  }
  if (check_bottom == -1) {
    for (int_tp i = 0; i < bottom.size(); ++i) {
      blobs_to_check.push_back(bottom[i]);
    }
  } else if (check_bottom >= 0) {
    CHECK_LT(check_bottom, bottom.size());
    blobs_to_check.push_back(bottom[check_bottom]);
    propagate_down[check_bottom] = true;
  }
  CHECK_GT(blobs_to_check.size(), 0)<< "No blobs to check.";
  // Compute the gradient analytically using Backward
  Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice());
  // Ignore the loss from the layer (it's just the weighted sum of the losses
  // from the top blobs, whose gradients we may want to test individually).
  layer->Forward(bottom, top);
  // Get additional loss from the objective
  GetObjAndGradient(*layer, top, top_id, top_data_id);
  layer->Backward(top, propagate_down, bottom);
  // Store computed gradients for all checked blobs
  vector<shared_ptr<Blob<Dtype> > > computed_gradient_blobs(
      blobs_to_check.size());
  for (int_tp blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    computed_gradient_blobs[blob_id].reset(new Blob<Dtype>());
    computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob);
    const int_tp count = blobs_to_check[blob_id]->count();
    const Dtype* diff = blobs_to_check[blob_id]->cpu_diff();
    Dtype* computed_gradients = computed_gradient_blobs[blob_id]
        ->mutable_cpu_data();

    caffe_cpu_copy(count, diff, computed_gradients);
  }
  // Compute derivative of top w.r.t. each bottom and parameter input using
  // finite differencing.
  // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
  for (int_tp blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    const Dtype* computed_gradients =
        computed_gradient_blobs[blob_id]->cpu_data();
    // LOG(ERROR) << "Blob " << blob_id << ": checking "
    //     << current_blob->count() << " parameters.";
    for (int_tp feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
      // For an element-wise layer, we only need to do finite differencing to
      // compute the derivative of top[top_id][top_data_id] w.r.t.
      // bottom[blob_id][i] only for i == top_data_id.  For any other
      // i != top_data_id, we know the derivative is 0 by definition, and simply
      // check that that's true.
      Dtype estimated_gradient = 0;
      Dtype positive_objective = 0;
      Dtype negative_objective = 0;
      if (!element_wise || (feat_id == top_data_id)) {
        // Do finite differencing.
        // Compute loss with stepsize_ added to input.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice());
        layer->Forward(bottom, top);
        positive_objective = GetObjAndGradient(*layer, top, top_id,
                                               top_data_id);
        // Compute loss with stepsize_ subtracted from input.
        current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
        Caffe::set_random_seed(seed_, Caffe::GetDefaultDevice());
        layer->Forward(bottom, top);
        negative_objective = GetObjAndGradient(*layer, top, top_id,
                                               top_data_id);
        // Recover original input value.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        estimated_gradient = (positive_objective - negative_objective)
            / stepsize_ / 2.;
      }
      Dtype computed_gradient = computed_gradients[feat_id];
      Dtype feature = current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
      //     << current_blob->cpu_diff()[feat_id];
      if (kink_ - kink_range_ > fabs(feature)
          || fabs(feature) > kink_ + kink_range_) {
        // We check relative accuracy, but for too small values, we threshold
        // the scale factor by 1.
        Dtype scale = std::max<Dtype>(
            std::max(fabs(computed_gradient), fabs(estimated_gradient)),
            Dtype(1.));
        EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale)
            << "debug: (top_id, top_data_id, blob_id, feat_id)=" << top_id
            << "," << top_data_id << "," << blob_id << "," << feat_id
            << "; feat = " << feature << "; objective+ = " << positive_objective
            << "; objective- = " << negative_objective;
      }
      // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "computed gradient: " << computed_gradient
      //    << " estimated_gradient: " << estimated_gradient;
    }
  }
}
void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
    vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top,
    int check_bottom, int top_id, int top_data_id, bool element_wise) {
  if (element_wise) {
    CHECK_EQ(0, layer->blobs().size());
    CHECK_LE(0, top_id);
    CHECK_LE(0, top_data_id);
    const int top_count = (*top)[top_id]->count();
    for (int blob_id = 0; blob_id < bottom->size(); ++blob_id) {
      CHECK_EQ(top_count, (*bottom)[blob_id]->count());
    }
  }
  // First, figure out what blobs we need to check against.
  vector<Blob<Dtype>*> blobs_to_check;
  vector<bool> propagate_down(bottom->size(), check_bottom < 0);
  for (int i = 0; i < layer->blobs().size(); ++i) {
    blobs_to_check.push_back(layer->blobs()[i].get());
  }
  if (check_bottom < 0) {
    for (int i = 0; i < bottom->size(); ++i) {
      blobs_to_check.push_back((*bottom)[i]);
    }
  } else {
    //printf("TestGradientUtil: setting propogat down to true\n");
    CHECK_LT(check_bottom, bottom->size());
    blobs_to_check.push_back((*bottom)[check_bottom]);
    propagate_down[check_bottom] = true;
  }
  // Compute the gradient analytically using Backward
  Caffe::set_random_seed(seed_);
  // Ignore the loss from the layer (it's just the weighted sum of the losses
  // from the top blobs, whose gradients we may want to test individually).
  layer->Forward(*bottom, top);
  // Get additional loss from the objective
  GetObjAndGradient(*layer, top, top_id, top_data_id);
  layer->Backward(*top, propagate_down, bottom);
  // Store computed gradients for all checked blobs
  vector<shared_ptr<Blob<Dtype> > >
      computed_gradient_blobs(blobs_to_check.size());
  for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    computed_gradient_blobs[blob_id].reset(new Blob<Dtype>());
    computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob);
    const int count = blobs_to_check[blob_id]->count();
    const Dtype* diff = blobs_to_check[blob_id]->cpu_diff();
    Dtype* computed_gradients =
        computed_gradient_blobs[blob_id]->mutable_cpu_data();
    caffe_copy(count, diff, computed_gradients);
  }
  // Compute derivative of top w.r.t. each bottom and parameter input using
  // finite differencing.
  // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
  for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
    Blob<Dtype>* current_blob = blobs_to_check[blob_id];
    const Dtype* computed_gradients =
        computed_gradient_blobs[blob_id]->cpu_data();
    // LOG(ERROR) << "Blob " << blob_id << ": checking "
    //     << current_blob->count() << " parameters.";
/*
    printf("TestGradientUtil: current_blob->count() : %d \n", current_blob->count());

    printf("TestGradientUtil: start printing current_blob data \n");

    for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
      printf("%f \t", (float) current_blob->mutable_cpu_data()[feat_id]);
    }
    */

    //printf("TestGradientUtil: end printing current_blob data \n \n");

    for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
      // For an element-wise layer, we only need to do finite differencing to
      // compute the derivative of (*top)[top_id][top_data_id] w.r.t.
      // (*bottom)[blob_id][i] only for i == top_data_id.  For any other
      // i != top_data_id, we know the derivative is 0 by definition, and simply
      // check that that's true.
      Dtype estimated_gradient = 0;
      Dtype positive_objective = 0;
      Dtype negative_objective = 0;

      if (!element_wise || (feat_id == top_data_id)) {
        // Do finite differencing.
        // Compute loss with stepsize_ added to input.
        /*
        printf("TestGradientUtil: feature_id %d , feature_value %f \n",feat_id
          , (float) current_blob->mutable_cpu_data()[feat_id]);
        */
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        Caffe::set_random_seed(seed_);
        layer->Forward(*bottom, top);
        positive_objective =
            GetObjAndGradient(*layer, top, top_id, top_data_id);
       // printf("TestGradientUtil: positive_objective : %f \n", (float) positive_objective );
      
        //printf("TestGradientUtil: stepsize_ : %f \n", (float) stepsize_);

        // Compute loss with stepsize_ subtracted from input.
        current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
        Caffe::set_random_seed(seed_);
        layer->Forward(*bottom, top);
        negative_objective =
            GetObjAndGradient(*layer, top, top_id, top_data_id);
          //printf("TestGradientUtil: negative_objective : %f \n", (float) negative_objective );
        // Recover original input value.
        current_blob->mutable_cpu_data()[feat_id] += stepsize_;
        estimated_gradient = (positive_objective - negative_objective) /
            stepsize_ / 2.;
      }
      Dtype computed_gradient = computed_gradients[feat_id];
      Dtype feature = current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
      //     << current_blob->cpu_diff()[feat_id];
      if (kink_ - kink_range_ > fabs(feature)
          || fabs(feature) > kink_ + kink_range_) {
        // We check relative accuracy, but for too small values, we threshold
        // the scale factor by 1.
        Dtype scale = std::max(
            std::max(fabs(computed_gradient), fabs(estimated_gradient)), 1.);
        /*
        printf("TestGradientUtil: computed_gradient : %f \n", (float) computed_gradient);
        printf("TestGradientUtil: estimated_gradient : %f \n", (float) estimated_gradient );
        printf("TestGradientUtil: Diff between computed and estimate : %f \n", (float) (computed_gradient - estimated_gradient));
        printf("TestGradientUtil: threshold_ : %f \n", (float) threshold_ );
        printf("TestGradientUtil: scale : %f \n", (float) scale );
        */
        EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale)
          << "debug: (top_id, top_data_id, blob_id, feat_id)="
          << top_id << "," << top_data_id << "," << blob_id << "," << feat_id
          << "; feat = " << feature
          << "; objective+ = " << positive_objective
          << "; objective- = " << negative_objective;
      }
      // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
      // LOG(ERROR) << "computed gradient: " << computed_gradient
      //    << " estimated_gradient: " << estimated_gradient;
    }
  }
}