void GAB::LearnGAB(DataSet& pos, DataSet& neg){ const Options& opt = Options::GetInstance(); timeval start, end; timeval Tstart, Tend; float time = 0; int nPos = pos.size; int nNeg = neg.size; float _FAR=1.0; int nFea=0; float aveEval=0; float *w = new float[nPos]; if(stages!=0){ int fail = 0; #pragma omp parallel for for (int i = 0; i < nPos; i++) { float score = 0; if(NPDClassify(pos.imgs[i].clone(),score)){ pos.Fx[i]=score; } else{ fail ++; } } if(fail!=0){ printf("you should't change pos data! %d \n",fail); return; } MiningNeg(nPos,neg); if(neg.imgs.size()<pos.imgs.size()){ printf("neg not enough, change neg rate or add neg Imgs %d %d\n",pos.imgs.size(),neg.imgs.size()); return; } pos.CalcWeight(1,opt.maxWeight); neg.CalcWeight(-1,opt.maxWeight); } Mat faceFea = pos.ExtractPixel(); pos.ImgClear(); printf("Extract pos feature finish\n"); Mat nonfaceFea = neg.ExtractPixel(); printf("Extract neg feature finish\n"); for (int t = stages;t<opt.maxNumWeaks;t++){ printf("start training %d stages \n",t); gettimeofday(&start,NULL); vector<int> posIndex; vector<int> negIndex; for(int i=0; i<nPos; i++) posIndex.push_back(i); for(int i=0; i<nNeg; i++) negIndex.push_back(i); //trim weight memcpy(w,pos.W,nPos*sizeof(float)); std::sort(&w[0],&w[nPos]); int k; float wsum; for(int i =0;i<nPos;i++){ wsum += w[i]; if (wsum>=opt.trimFrac){ k = i; break; } } vector< int >::iterator iter; for(iter = posIndex.begin();iter!=posIndex.end();){ if(pos.W[*iter]<w[k]) iter = posIndex.erase(iter); else ++iter; } wsum = 0; memcpy(w,neg.W,nNeg*sizeof(float)); std::sort(&w[0],&w[nNeg]); for(int i =0;i<nNeg;i++){ wsum += w[i]; if (wsum>=opt.trimFrac){ k = i; break; } } for(iter = negIndex.begin();iter!=negIndex.end();){ if(neg.W[*iter]<w[k]) iter = negIndex.erase(iter); else ++iter; } int nPosSam = posIndex.size(); int nNegSam = negIndex.size(); int minLeaf_t = max( round((nPosSam+nNegSam)*opt.minLeafFrac),opt.minLeaf); vector<int> feaId, leftChild, rightChild; vector< vector<unsigned char> > cutpoint; vector<float> fit; printf("Iter %d: nPos=%d, nNeg=%d, ", t, nPosSam, nNegSam); DQT dqt; gettimeofday(&Tstart,NULL); float mincost = dqt.Learn(faceFea,nonfaceFea,pos.W,neg.W,posIndex,negIndex,minLeaf_t,feaId,leftChild,rightChild,cutpoint,fit); gettimeofday(&Tend,NULL); float DQTtime = (Tend.tv_sec - Tstart.tv_sec); printf("DQT time:%.3fs\n",DQTtime); if (feaId.empty()){ printf("\n\nNo available features to satisfy the split. The AdaBoost learning terminates.\n"); break; } Mat posX(feaId.size(),faceFea.cols,CV_8UC1); for(int i = 0;i<feaId.size();i++) for(int j = 0;j<faceFea.cols;j++){ int x,y; GetPoints(feaId[i],&x,&y); unsigned char Fea = ppNpdTable.at<uchar>(faceFea.at<uchar>(x,j),faceFea.at<uchar>(y,j)); posX.at<uchar>(i,j) = Fea; } Mat negX(feaId.size(),nonfaceFea.cols,CV_8UC1); for(int i = 0;i<feaId.size();i++) for(int j = 0;j<nonfaceFea.cols;j++){ int x,y; GetPoints(feaId[i],&x,&y); unsigned char Fea = ppNpdTable.at<uchar>(nonfaceFea.at<uchar>(x,j),nonfaceFea.at<uchar>(y,j)); negX.at<uchar>(i,j) = Fea; } TestDQT(pos.Fx,fit,cutpoint,leftChild,rightChild,posX); TestDQT(neg.Fx,fit,cutpoint,leftChild,rightChild,negX); vector<int> negPassIndex; for(int i=0; i<nNegSam; i++) negPassIndex.push_back(i); memcpy(w,pos.Fx,nPos*sizeof(float)); sort(w,w+nPos); int index = max(floor(nPos*(1-opt.minDR)),0); float threshold = w[index]; for(iter = negPassIndex.begin(); iter != negPassIndex.end();){ if(neg.Fx[*iter] < threshold) iter = negPassIndex.erase(iter); else iter++; } float far = float(negPassIndex.size())/float(nNeg); int depth = CalcTreeDepth(leftChild,rightChild); if(t==1) aveEval+=depth; else aveEval+=depth*_FAR; _FAR *=far; nFea = nFea + feaId.size(); gettimeofday(&end,NULL); time += (end.tv_sec - start.tv_sec); int nNegPass = negPassIndex.size(); printf("FAR(t)=%.2f%%, FAR=%.2g, depth=%d, nFea(t)=%d, nFea=%d, cost=%.3f.\n",far*100.,_FAR,depth,feaId.size(),nFea,mincost); printf("\t\tnNegPass=%d, aveEval=%.3f, time=%.3fs, meanT=%.3fs.\n", nNegPass, aveEval, time, time/(stages+1)); if(_FAR<=opt.maxFAR){ printf("\n\nThe training is converged at iteration %d. FAR = %.2f%%\n", t, _FAR * 100); break; } SaveIter(feaId,leftChild,rightChild,cutpoint,fit,threshold); gettimeofday(&Tstart,NULL); neg.Remove(negPassIndex); MiningNeg(nPos,neg); nonfaceFea = neg.ExtractPixel(); pos.CalcWeight(1,opt.maxWeight); neg.CalcWeight(-1,opt.maxWeight); gettimeofday(&Tend,NULL); float Ttime = (Tend.tv_sec - Tstart.tv_sec); printf("neg mining time:%.3fs\n",Ttime); if(!(stages%opt.saveStep)){ Save(); printf("save the model\n"); } } delete []w; }
void BoostCart::Train(DataSet& pos, DataSet& neg) { Config& c = Config::GetInstance(); JoinCascador& joincascador = *c.joincascador; // statistic parameters const int pos_original_size = pos.size; const int neg_original_size = int(pos_original_size * c.nps[stage]); int neg_rejected = 0; const int landmark_n = c.landmark_n; const int normalization_step = landmark_n*c.score_normalization_steps[stage]; RNG& rng = c.rng_pool[0]; //int drop_n = (1. - c.recall[stage])*pos.size / K; // pos drop number per cart //if (drop_n <= 1) drop_n = 1; int drop_n = c.drops[stage]; const int start_of_cart = joincascador.current_cart_idx + 1; int restarts = 0; double best_drop_rate = 0.; Cart best_cart = carts[0]; // Real Boost // if neg.size < neg_th, mining starts int current_stage_idx = c.joincascador->current_stage_idx; int neg_th = int(pos.size*c.nps[current_stage_idx] * c.mining_th[current_stage_idx]); for (int k = start_of_cart; k < K; k++) { const int kk = k + 1; Cart& cart = carts[k]; if (neg.size < neg_th) { neg.MoreNegSamples(pos.size, c.nps[stage]); neg_th = int(neg.size * c.mining_th[current_stage_idx]); // update neg_th } // print out data set status pos.QSort(); neg.QSort(); LOG("Pos max score = %.4lf, min score = %.4lf", pos.scores[0], pos.scores[pos.size - 1]); LOG("Neg max score = %.4lf, min score = %.4lf", neg.scores[0], neg.scores[neg.size - 1]); // draw scores desity graph draw_density_graph(pos.scores, neg.scores); // update weights DataSet::UpdateWeights(pos, neg); LOG("Current Positive DataSet Size is %d", pos.size); LOG("Current Negative DataSet Size is %d", neg.size); // train cart TIMER_BEGIN LOG("Train %d th Cart", k + 1); cart.Train(pos, neg); LOG("Done with %d th Cart, costs %.4lf s", k + 1, TIMER_NOW); TIMER_END joincascador.current_cart_idx = k; // update score and last_score pos.UpdateScores(cart); neg.UpdateScores(cart); if (kk % normalization_step == 0) { DataSet::CalcMeanAndStd(pos, neg, cart.mean, cart.std); pos.ApplyMeanAndStd(cart.mean, cart.std); neg.ApplyMeanAndStd(cart.mean, cart.std); } else { cart.mean = 0.; cart.std = 1.; } // select th for pre-defined recall pos.QSort(); neg.QSort(); cart.th = pos.CalcThresholdByNumber(drop_n); int pos_n = pos.size; int neg_n = neg.size; int will_removed = neg.PreRemove(cart.th); double tmp_drop_rate = double(will_removed) / neg_n; int number_of_carts = joincascador.current_stage_idx*joincascador.K + joincascador.current_cart_idx; if (c.restart_on && tmp_drop_rate < c.restart_th[joincascador.current_stage_idx] && number_of_carts > 10) { restarts++; LOG("***** Drop %d, Drop rate neg is %.4lf%%, Restart current Cart *****", will_removed, tmp_drop_rate*100.); LOG("***** Restart Time: %d *****", restarts); LOG("Current trained Cart below"); cart.PrintSelf(); // compare with best cart for now if (tmp_drop_rate > best_drop_rate) { best_drop_rate = tmp_drop_rate; best_cart = cart; } // select the best cart for this cart if (restarts >= c.restart_times) { LOG("***** Select a cart which give us %.4lf%% drop rate *****", best_drop_rate*100.); cart = best_cart; best_drop_rate = 0.; pos.ResetScores(); neg.ResetScores(); pos.UpdateScores(cart); neg.UpdateScores(cart); if (kk % normalization_step == 0) { DataSet::CalcMeanAndStd(pos, neg, cart.mean, cart.std); pos.ApplyMeanAndStd(cart.mean, cart.std); neg.ApplyMeanAndStd(cart.mean, cart.std); } else { cart.mean = 0.; cart.std = 1.; } pos.QSort(); neg.QSort(); //JDA_Assert(cart.th == pos.CalcThresholdByNumber(1), "restart error"); } else { // recover data scores pos.ResetScores(); neg.ResetScores(); k--; continue; } } // update restart parameters best_drop_rate = 0.; restarts = 0; pos.Remove(cart.th); neg.Remove(cart.th); // print cart info cart.PrintSelf(); if ((kk != K) && (kk%c.snapshot_iter == 0)) { // snapshot model and data DataSet::Snapshot(pos, neg); c.joincascador->Snapshot(); } int pos_drop = pos_n - pos.size; int neg_drop = neg_n - neg.size; double pos_drop_rate = double(pos_drop) / double(pos_n)* 100.; double neg_drop_rate = double(neg_drop) / double(neg_n)* 100.; LOG("Pos drop = %d, Neg drop = %d, drop rate = %.2lf%%", pos_drop, neg_drop, neg_drop_rate); neg_rejected += neg_n - neg.size; } // Global Regression with LBF // generate lbf const int pos_n = pos.size; const int neg_n = neg.size; LOG("Generate LBF of DataSet"); vector<Mat_<int> > pos_lbf(pos_n); vector<Mat_<int> > neg_lbf(neg_n); #pragma omp parallel for for (int i = 0; i < pos_n; i++) { pos_lbf[i] = GenLBF(pos.imgs[i], pos.current_shapes[i]); } #pragma omp parallel for for (int i = 0; i < neg_n; i++) { neg_lbf[i] = GenLBF(neg.imgs[i], neg.current_shapes[i]); } // regression, use valid face which has gt_shape vector<int> valid_pos_idx; vector<Mat_<int> > valid_pos_lbf; for (int i = 0; i < pos.size; i++) { if (pos.HasGtShape(i)) { valid_pos_idx.push_back(i); valid_pos_lbf.push_back(pos_lbf[i]); } } Mat_<double> shape_residual_valid = pos.CalcShapeResidual(valid_pos_idx); LOG("Start Global Regression"); GlobalRegression(valid_pos_lbf, shape_residual_valid); // update shapes #pragma omp parallel for for (int i = 0; i < pos_n; i++) { pos.current_shapes[i] += GenDeltaShape(pos_lbf[i], pos.stp_mc[i]); } #pragma omp parallel for for (int i = 0; i < neg_n; i++) { neg.current_shapes[i] += GenDeltaShape(neg_lbf[i], neg.stp_mc[i]); } // summary LOG("===================="); LOG("| Summary |"); LOG("===================="); // regression error vector<Mat_<double> > valid_gt_shapes; vector<Mat_<double> > valid_current_shapes; for (int i = 0; i < pos.size; i++) { if (pos.HasGtShape(i)) { valid_gt_shapes.push_back(pos.gt_shapes[i]); valid_current_shapes.push_back(pos.current_shapes[i]); } } double e = calcMeanError(valid_gt_shapes, valid_current_shapes); LOG("Regression Mean Error = %.4lf", e); // accept and reject rate double accept_rate = 0.; double reject_rate = 0.; accept_rate = double(pos_n) / double(pos_original_size) * 100.; reject_rate = double(neg_rejected) / double(neg_rejected + neg_original_size) * 100.; LOG("Accept Rate = %.2lf%%", accept_rate); // Done }