void ConfusionMatrix::print_summary() { // overall counts and summary cout.precision(4); cout << "== Summary ==" << endl; cout << setw(23) <<"Correctly classified:" << setw(12) << right << correct << setw(10) << right << accuracy() * 100 << "%" << endl; cout << setw(23) << "Incorrectly classified:" << setw(12) << right << incorrect << setw(10) << right << error() * 100 << "%" << endl; cout << setw(23) << "Total classifications:" << setw(12) << right << correct + incorrect << endl << endl; // determine the width of the left (category name) column int max_name_length = 0; for(int category = 1; category <= data_set->categories_size(); category++) if(data_set->category_feature()->names[category].length() > max_name_length) max_name_length = data_set->category_feature()->names[category].length(); if(average_row_name.length() > max_name_length) max_name_length = average_row_name.length(); max_name_length += 1; // detailed category information cout << "== Category Performance ==" << endl; cout << setw(max_name_length) << ""; cout << setw(9) << right << "True +"; cout << setw(9) << right << "False +"; cout << setw(9) << right << "True -"; cout << setw(9) << right << "False -"; cout << setw(9) << right << "Precis."; cout << setw(9) << right << "Recall"; cout << setw(9) << right << "F-score" << endl; for(int category = 1; category <= data_set->categories_size(); category++) { cout << setw(max_name_length) << data_set->category_feature()->names[category]; cout << setw(9) << tp(category); cout << setw(9) << fp(category); cout << setw(9) << tn(category); cout << setw(9) << fn(category); cout << setw(8) << precision(category) * 100 << "%"; cout << setw(8) << recall(category) * 100 << "%"; cout << setw(8) << fscore(category) * 100 << "%" << endl; } cout << setw(max_name_length) << average_row_name; cout << setw(9) << avg_tp(); cout << setw(9) << avg_fp(); cout << setw(9) << avg_tn(); cout << setw(9) << avg_fn(); cout << setw(8) << avg_precision() * 100 << "%"; cout << setw(8) << avg_recall() * 100 << "%"; cout << setw(8) << avg_fscore() * 100 << "%" << endl; }
/** Runs the precision/recall test. * May log errors and even end the application in case of severe error. * @param params The program options. */ void eval_precision_recall( const program_options& params) { LOG(info) << "Loading class membership mappings..."; Vec1UInt membership_mappings; Vec1str cluster_files; exit_if_false( from_file( params.membership_mappings_file, membership_mappings), RETURN_CODE::IO_ERROR); exit_if_false( from_file( params.cluster_file_paths_file, cluster_files), RETURN_CODE::IO_ERROR); const uint n_features = static_cast<uint>(membership_mappings.size()); const uint n_clusters = static_cast<uint>(cluster_files.size()); LOG(info) << "# features: " << n_features; LOG(info) << "# clusters: " << n_clusters; LOG( info) << "calculating precision / recall ..."; // find best class for each cluster vector<cluster_info_t> class_mapping; for( uint i=0; i<n_clusters; ++i) { Vec1str current_cluster_image_paths; Vec1str current_cluster_real_image_classes; std::map<string, uint> class_votes; from_file( cluster_files[i], current_cluster_image_paths); for( auto it=current_cluster_image_paths.begin(); it!=current_cluster_image_paths.end(); ++it) { const string class_name = bfs::path(*it).parent_path().filename().string(); current_cluster_real_image_classes.push_back( class_name); const auto map_it = class_votes.find( class_name); if( map_it == class_votes.end()) class_votes[class_name] = 1; else map_it->second += 1; } const auto max_it = std::max_element( class_votes.begin(), class_votes.end(), []( const std::pair<string, int>& p, const std::pair<string, int>& q) { return p.second < q.second; }); if( max_it == class_votes.end()) { // ***cluster empty *** (yes, that can happen!) continue; } const string assigned_class( max_it->first); const uint n_retrieved_images( static_cast<uint>(current_cluster_image_paths.size())); uint false_positives(0); uint true_positives(0); uint false_negatives(0); // find true positives, false positives for( auto it=current_cluster_real_image_classes.begin(); it!=current_cluster_real_image_classes.end(); ++it) { const string& real_class = *it; if( assigned_class.compare( real_class) == 0) { ++true_positives; } else { ++false_positives; } } // false negatives std::stringstream folder_name; folder_name << params.image_db_directory << '/' << assigned_class; bfs::path folder_path( folder_name.str()); assert( bfs::exists( folder_path) && "the directory must exist."); uint n_relevant_images(0); for( bfs::directory_iterator it(folder_path); it!=bfs::directory_iterator(); ++it) { bfs::path p(*it); p.make_preferred(); if( !p.has_extension() || !is_image_filetype_supported( p.extension().string())) continue; ++n_relevant_images; Vec1str::iterator pos = std::find( current_cluster_image_paths.begin(), current_cluster_image_paths.end(), p.string()); if( pos == current_cluster_image_paths.end()) ++false_negatives; } assert( true_positives + false_negatives == n_relevant_images && "number of relevant images must be identical to the number of true positivies and false positives"); // *** found true positivies, false positives, false negatives for cluster i *** // calc precision/recall for each class const real precision = static_cast<real>(true_positives) / n_retrieved_images; const real recall = static_cast<real>(true_positives) / n_relevant_images; class_mapping.push_back( cluster_info_t( assigned_class, true_positives, false_positives, false_negatives, precision, recall)); } real avg_true_positives(0); real avg_false_positives(0); real avg_false_negatives(0); real avg_precision(0); real avg_recall(0); LOG( info) << "<class name> <true positives> <false positives> <false negatives> <precision> <recall>"; for( uint i=0; i<class_mapping.size(); ++i) { const cluster_info_t& ci = class_mapping[i]; const string& cluster_name = std::get<0>(ci); const uint true_positives = std::get<1>(ci); const uint false_positives = std::get<2>(ci); const uint false_negatives = std::get<3>(ci); const real precision = std::get<4>(ci); const real recall = std::get<5>(ci); avg_true_positives += true_positives; avg_false_positives += false_positives; avg_false_negatives += false_negatives; avg_precision += precision; avg_recall += recall; LOG(info) << cluster_name << " " << true_positives << " " << false_positives << " " << false_negatives << " " << precision << " " << recall; } avg_true_positives /= class_mapping.size(); avg_false_positives /= class_mapping.size(); avg_false_negatives /= class_mapping.size(); avg_precision /= class_mapping.size(); avg_recall /= class_mapping.size(); LOG(info) << "Average: <true positives> <false positives> <false negatives> <precision> <recall>"; LOG(info) << avg_true_positives << " " << avg_false_positives << " " << avg_false_negatives << " " << avg_precision << " " << avg_recall; LOG(info) << "Writing stats to file \"" << params.precision_recall_file << "\"..."; to_file( params.precision_recall_file, class_mapping); }