Example #1
0
void BoostCart::Train(DataSet& pos, DataSet& neg) {
  Config& c = Config::GetInstance();
  JoinCascador& joincascador = *c.joincascador;

  // statistic parameters
  const int pos_original_size = pos.size;
  const int neg_original_size = int(pos_original_size * c.nps[stage]);
  int neg_rejected = 0;

  const int landmark_n = c.landmark_n;
  const int normalization_step = landmark_n*c.score_normalization_steps[stage];
  RNG& rng = c.rng_pool[0];
  //int drop_n = (1. - c.recall[stage])*pos.size / K; // pos drop number per cart
  //if (drop_n <= 1) drop_n = 1;
  int drop_n = c.drops[stage];

  const int start_of_cart = joincascador.current_cart_idx + 1;
  int restarts = 0;
  double best_drop_rate = 0.;
  Cart best_cart = carts[0];

  // Real Boost

  // if neg.size < neg_th, mining starts
  int current_stage_idx = c.joincascador->current_stage_idx;
  int neg_th = int(pos.size*c.nps[current_stage_idx] * c.mining_th[current_stage_idx]);
  for (int k = start_of_cart; k < K; k++) {
    const int kk = k + 1;
    Cart& cart = carts[k];
    if (neg.size < neg_th) {
      neg.MoreNegSamples(pos.size, c.nps[stage]);
      neg_th = int(neg.size * c.mining_th[current_stage_idx]); // update neg_th
    }
    // print out data set status
    pos.QSort(); neg.QSort();
    LOG("Pos max score = %.4lf, min score = %.4lf", pos.scores[0], pos.scores[pos.size - 1]);
    LOG("Neg max score = %.4lf, min score = %.4lf", neg.scores[0], neg.scores[neg.size - 1]);
    // draw scores desity graph
    draw_density_graph(pos.scores, neg.scores);
    // update weights
    DataSet::UpdateWeights(pos, neg);
    LOG("Current Positive DataSet Size is %d", pos.size);
    LOG("Current Negative DataSet Size is %d", neg.size);
    // train cart
    TIMER_BEGIN
      LOG("Train %d th Cart", k + 1);
      cart.Train(pos, neg);
      LOG("Done with %d th Cart, costs %.4lf s", k + 1, TIMER_NOW);
    TIMER_END
    joincascador.current_cart_idx = k;
    // update score and last_score
    pos.UpdateScores(cart);
    neg.UpdateScores(cart);
    if (kk % normalization_step == 0) {
      DataSet::CalcMeanAndStd(pos, neg, cart.mean, cart.std);
      pos.ApplyMeanAndStd(cart.mean, cart.std);
      neg.ApplyMeanAndStd(cart.mean, cart.std);
    }
    else {
      cart.mean = 0.;
      cart.std = 1.;
    }
    // select th for pre-defined recall
    pos.QSort();
    neg.QSort();
    cart.th = pos.CalcThresholdByNumber(drop_n);
    int pos_n = pos.size;
    int neg_n = neg.size;
    int will_removed = neg.PreRemove(cart.th);
    double tmp_drop_rate = double(will_removed) / neg_n;
    int number_of_carts = joincascador.current_stage_idx*joincascador.K + joincascador.current_cart_idx;
    if (c.restart_on && tmp_drop_rate < c.restart_th[joincascador.current_stage_idx] && number_of_carts > 10) {
      restarts++;
      LOG("***** Drop %d, Drop rate neg is %.4lf%%, Restart current Cart *****", will_removed, tmp_drop_rate*100.);
      LOG("***** Restart Time: %d *****", restarts);
      LOG("Current trained Cart below");
      cart.PrintSelf();

      // compare with best cart for now
      if (tmp_drop_rate > best_drop_rate) {
        best_drop_rate = tmp_drop_rate;
        best_cart = cart;
      }
      // select the best cart for this cart
      if (restarts >= c.restart_times) {
        LOG("***** Select a cart which give us %.4lf%% drop rate *****", best_drop_rate*100.);
        cart = best_cart;
        best_drop_rate = 0.;
        pos.ResetScores();
        neg.ResetScores();
        pos.UpdateScores(cart);
        neg.UpdateScores(cart);
        if (kk % normalization_step == 0) {
          DataSet::CalcMeanAndStd(pos, neg, cart.mean, cart.std);
          pos.ApplyMeanAndStd(cart.mean, cart.std);
          neg.ApplyMeanAndStd(cart.mean, cart.std);
        }
        else {
          cart.mean = 0.;
          cart.std = 1.;
        }
        pos.QSort();
        neg.QSort();
        //JDA_Assert(cart.th == pos.CalcThresholdByNumber(1), "restart error");
      }
      else {
        // recover data scores
        pos.ResetScores();
        neg.ResetScores();
        k--;
        continue;
      }
    }

    // update restart parameters
    best_drop_rate = 0.;
    restarts = 0;

    pos.Remove(cart.th);
    neg.Remove(cart.th);

    // print cart info
    cart.PrintSelf();
    if ((kk != K) && (kk%c.snapshot_iter == 0)) { // snapshot model and data
      DataSet::Snapshot(pos, neg);
      c.joincascador->Snapshot();
    }

    int pos_drop = pos_n - pos.size;
    int neg_drop = neg_n - neg.size;
    double pos_drop_rate = double(pos_drop) / double(pos_n)* 100.;
    double neg_drop_rate = double(neg_drop) / double(neg_n)* 100.;
    LOG("Pos drop = %d, Neg drop = %d, drop rate = %.2lf%%", pos_drop, neg_drop, neg_drop_rate);
    neg_rejected += neg_n - neg.size;
  }
  // Global Regression with LBF
  // generate lbf
  const int pos_n = pos.size;
  const int neg_n = neg.size;
  LOG("Generate LBF of DataSet");
  vector<Mat_<int> > pos_lbf(pos_n);
  vector<Mat_<int> > neg_lbf(neg_n);

  #pragma omp parallel for
  for (int i = 0; i < pos_n; i++) {
    pos_lbf[i] = GenLBF(pos.imgs[i], pos.current_shapes[i]);
  }
  #pragma omp parallel for
  for (int i = 0; i < neg_n; i++) {
    neg_lbf[i] = GenLBF(neg.imgs[i], neg.current_shapes[i]);
  }

  // regression, use valid face which has gt_shape
  vector<int> valid_pos_idx;
  vector<Mat_<int> > valid_pos_lbf;
  for (int i = 0; i < pos.size; i++) {
    if (pos.HasGtShape(i)) {
      valid_pos_idx.push_back(i);
      valid_pos_lbf.push_back(pos_lbf[i]);
    }
  }
  Mat_<double> shape_residual_valid = pos.CalcShapeResidual(valid_pos_idx);
  LOG("Start Global Regression");
  GlobalRegression(valid_pos_lbf, shape_residual_valid);
  // update shapes
  #pragma omp parallel for
  for (int i = 0; i < pos_n; i++) {
    pos.current_shapes[i] += GenDeltaShape(pos_lbf[i], pos.stp_mc[i]);
  }
  #pragma omp parallel for
  for (int i = 0; i < neg_n; i++) {
    neg.current_shapes[i] += GenDeltaShape(neg_lbf[i], neg.stp_mc[i]);
  }

  // summary
  LOG("====================");
  LOG("|      Summary     |");
  LOG("====================");
  // regression error
  vector<Mat_<double> > valid_gt_shapes;
  vector<Mat_<double> > valid_current_shapes;
  for (int i = 0; i < pos.size; i++) {
    if (pos.HasGtShape(i)) {
      valid_gt_shapes.push_back(pos.gt_shapes[i]);
      valid_current_shapes.push_back(pos.current_shapes[i]);
    }
  }
  double e = calcMeanError(valid_gt_shapes, valid_current_shapes);
  LOG("Regression Mean Error = %.4lf", e);

  // accept and reject rate
  double accept_rate = 0.;
  double reject_rate = 0.;
  accept_rate = double(pos_n) / double(pos_original_size) * 100.;
  reject_rate = double(neg_rejected) / double(neg_rejected + neg_original_size) * 100.;
  LOG("Accept Rate = %.2lf%%", accept_rate);
  // Done
}