Exemple #1
0
/*********************************
 * Function: getStrongClassifier
 * -----------------------------
 * This function generates a strong classifier that can accurately distinguish
 * positive training examples from negative training examples
 *
 * td: set of feature vectors
 * num_classifiers: the number of weak classifiers we want to be in the strong
 * 		classifier (20 tends to be a good default it seems)
 *
 * returns: a strong classifier -- set of weak classifiers in optimal order
 */
StrongClassifier AdaBooster::getStrongClassifier(const TrainingData &trainingData, unsigned int num_classifiers){
  TrainingData td = trainingData;

  // set dimensions and number of features
  dimensions = td.dimensions();
  num_features = td.size();

  // initialize feature weights
  init_feature_weight(td);

  // vector of weak classifiers that make up a strong classifier
  vector<WeakClassifier> strong_classifier;

  // sort circle by features -- store in *sorted*
  create_feature_views(td);
  //td.printData();

  //char garbage[80]; // use this with cin.getline() below

  for (unsigned int i=0; i<num_classifiers; i++){
      // indentify best classifier
      WeakClassifier *wc = get_best_classifier();

	  // if index invalid, then we're done constructing our strong classifier
	  if (!wc)
		  return StrongClassifier(strong_classifier);

	  // otherwise, add best classifier to strong_classifier
	  strong_classifier.push_back(*wc);
	  //strong_classifier.back().printClassifier();

	  // don't need classifier anymore so delete it
	  delete wc;

	  // don't think this is useful anymore, but I'm scared to delete it
	  // without more testing, so it stays for now. TODO: delete this?
	  td.writeData(tdFile);

	  //td.printData();

	  // update weights of features
	  update_feature_weight(td, strong_classifier.back());

	  // print out status update
	  printf("\rClassifiers Calculated: %d ", i+1);
	  fflush(stdout);

      /*
      td.printData(); // prints out the set of training data
      cin.getline(garbage, 80); // if you want to pause printing, use this
      // */
  }
  // delete sorted array
  delete [] sorted;

  // StrongClassifier error reporting
  /*vector< vector<double> > strong_err = getStrongError(td, strong_classifier);
  printStrongStats(strong_err);*/

  return strong_classifier;
} 
Exemple #2
0
/**************************************
 * Function: getStrongError
 * ------------------------
 * calculates error rates at each "level" of the strong classifier; i.e. at
 * each weak classifier
 *
 * td: Training data to check strong classifier against
 * strong: strong classifier (i.e. ordered set of weak classifiers)
 *
 * returns: a list of the errors at each level
 */
vector<vector <double> > AdaBooster::getStrongError(TrainingData &td, const WeakClassifierList &strong){
	unsigned int true_pos, false_pos, true_neg, false_neg;
	float precision, recall;
	vector< vector<double> > strong_err;
	vector<double> stats;

	// clear false_indices
	false_indices.clear();
	
	// set dimensions and number of features
	dimensions = td.dimensions();
	num_features = td.size();

	// initialize vector of num_ftrs to zero
	vector<double> classify;
	for (unsigned int i=0; i<num_features; i++)
		classify.push_back(0.0);
	
	int sign;
	// traverse all weak classifiers
	for (unsigned int i=0; i<strong.size(); i++){
		true_pos = false_pos = true_neg = false_neg = precision = recall = 0;
		// traverse all features
		for (unsigned int j=0; j<num_features; j++){
			// check what the classifier guessed. If weak classifier decided
			// the feature was POS, sign = 1, otherwise sign = -1
			if ( (strong[i].threshold() > td.at(j,strong[i].dimension()) && !strong[i].isFlipped()) ||
					(strong[i].threshold() < td.at(j,strong[i].dimension()) && strong[i].isFlipped()) )
				sign = 1;
			else
				sign = -1;

			// calculate classify so far
			classify[j] += strong[i].weight() * sign;

			// check classification against reality
			if (classify.at(j) >= strong_err_threshold && td.val(j) == POS)
				true_pos++;
			else if (classify.at(j) >= strong_err_threshold && td.val(j) == NEG){
				false_pos++;
				// if we're at the last weak classifier and we still can't classify this point
				if (i == strong.size()-1)
					false_indices.push_back(j); // add index to false indices vector
			}
			else if (classify.at(j) < strong_err_threshold && td.val(j) == POS){
				false_neg++;
				// similarly, we can't classify the point
				if (i == strong.size()-1)
					false_indices.push_back(j);
			}
			else
				true_neg++;
		}
		// calculate some stats and push into strong_err
		stats.clear();
		stats.push_back((double)(false_pos + false_neg)/num_features); // flat error percentage
		stats.push_back((double)(true_pos)/(true_pos+false_pos)); // precision
		stats.push_back((double)(true_pos)/(true_pos+false_neg)); // recall
		stats.push_back((double)true_pos); // true positives
		stats.push_back((double)true_neg); // true negatives
		stats.push_back((double)false_pos); // false positives
		stats.push_back((double)false_neg); // false negatives

		strong_err.push_back(stats);
	}
	return strong_err;
}