/********************************* * Function: getStrongClassifier * ----------------------------- * This function generates a strong classifier that can accurately distinguish * positive training examples from negative training examples * * td: set of feature vectors * num_classifiers: the number of weak classifiers we want to be in the strong * classifier (20 tends to be a good default it seems) * * returns: a strong classifier -- set of weak classifiers in optimal order */ StrongClassifier AdaBooster::getStrongClassifier(const TrainingData &trainingData, unsigned int num_classifiers){ TrainingData td = trainingData; // set dimensions and number of features dimensions = td.dimensions(); num_features = td.size(); // initialize feature weights init_feature_weight(td); // vector of weak classifiers that make up a strong classifier vector<WeakClassifier> strong_classifier; // sort circle by features -- store in *sorted* create_feature_views(td); //td.printData(); //char garbage[80]; // use this with cin.getline() below for (unsigned int i=0; i<num_classifiers; i++){ // indentify best classifier WeakClassifier *wc = get_best_classifier(); // if index invalid, then we're done constructing our strong classifier if (!wc) return StrongClassifier(strong_classifier); // otherwise, add best classifier to strong_classifier strong_classifier.push_back(*wc); //strong_classifier.back().printClassifier(); // don't need classifier anymore so delete it delete wc; // don't think this is useful anymore, but I'm scared to delete it // without more testing, so it stays for now. TODO: delete this? td.writeData(tdFile); //td.printData(); // update weights of features update_feature_weight(td, strong_classifier.back()); // print out status update printf("\rClassifiers Calculated: %d ", i+1); fflush(stdout); /* td.printData(); // prints out the set of training data cin.getline(garbage, 80); // if you want to pause printing, use this // */ } // delete sorted array delete [] sorted; // StrongClassifier error reporting /*vector< vector<double> > strong_err = getStrongError(td, strong_classifier); printStrongStats(strong_err);*/ return strong_classifier; }
/************************************** * Function: getStrongError * ------------------------ * calculates error rates at each "level" of the strong classifier; i.e. at * each weak classifier * * td: Training data to check strong classifier against * strong: strong classifier (i.e. ordered set of weak classifiers) * * returns: a list of the errors at each level */ vector<vector <double> > AdaBooster::getStrongError(TrainingData &td, const WeakClassifierList &strong){ unsigned int true_pos, false_pos, true_neg, false_neg; float precision, recall; vector< vector<double> > strong_err; vector<double> stats; // clear false_indices false_indices.clear(); // set dimensions and number of features dimensions = td.dimensions(); num_features = td.size(); // initialize vector of num_ftrs to zero vector<double> classify; for (unsigned int i=0; i<num_features; i++) classify.push_back(0.0); int sign; // traverse all weak classifiers for (unsigned int i=0; i<strong.size(); i++){ true_pos = false_pos = true_neg = false_neg = precision = recall = 0; // traverse all features for (unsigned int j=0; j<num_features; j++){ // check what the classifier guessed. If weak classifier decided // the feature was POS, sign = 1, otherwise sign = -1 if ( (strong[i].threshold() > td.at(j,strong[i].dimension()) && !strong[i].isFlipped()) || (strong[i].threshold() < td.at(j,strong[i].dimension()) && strong[i].isFlipped()) ) sign = 1; else sign = -1; // calculate classify so far classify[j] += strong[i].weight() * sign; // check classification against reality if (classify.at(j) >= strong_err_threshold && td.val(j) == POS) true_pos++; else if (classify.at(j) >= strong_err_threshold && td.val(j) == NEG){ false_pos++; // if we're at the last weak classifier and we still can't classify this point if (i == strong.size()-1) false_indices.push_back(j); // add index to false indices vector } else if (classify.at(j) < strong_err_threshold && td.val(j) == POS){ false_neg++; // similarly, we can't classify the point if (i == strong.size()-1) false_indices.push_back(j); } else true_neg++; } // calculate some stats and push into strong_err stats.clear(); stats.push_back((double)(false_pos + false_neg)/num_features); // flat error percentage stats.push_back((double)(true_pos)/(true_pos+false_pos)); // precision stats.push_back((double)(true_pos)/(true_pos+false_neg)); // recall stats.push_back((double)true_pos); // true positives stats.push_back((double)true_neg); // true negatives stats.push_back((double)false_pos); // false positives stats.push_back((double)false_neg); // false negatives strong_err.push_back(stats); } return strong_err; }