// Sets the report string to a combined human and machine-readable report // string of the error rates. // Returns false if there is no data, leaving report unchanged, unless // even_if_empty is true. bool ErrorCounter::ReportString(bool even_if_empty, const Counts& counts, STRING* report) { // Compute the error rates. double rates[CT_SIZE]; if (!ComputeRates(counts, rates) && !even_if_empty) return false; // Using %.4g%%, the length of the output string should exactly match the // length of the format string, but in case of overflow, allow for +eddd // on each number. const int kMaxExtraLength = 5; // Length of +eddd. // Keep this format string and the snprintf in sync with the CountTypes enum. const char* format_str = "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] " "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, " "FontAttr=%.4g%%, Multi=%.4g%%, " "Answers=%.3g, Rank=%.3g, " "OKjunk=%.4g%%, Badjunk=%.4g%%"; int max_str_len = strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1; char* formatted_str = new char[max_str_len]; snprintf(formatted_str, max_str_len, format_str, rates[CT_UNICHAR_TOP1_ERR] * 100.0, rates[CT_UNICHAR_TOP2_ERR] * 100.0, rates[CT_UNICHAR_TOPN_ERR] * 100.0, rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, rates[CT_OK_MULTI_UNICHAR] * 100.0, rates[CT_OK_JOINED] * 100.0, rates[CT_OK_BROKEN] * 100.0, rates[CT_REJECT] * 100.0, rates[CT_FONT_ATTR_ERR] * 100.0, rates[CT_OK_MULTI_FONT] * 100.0, rates[CT_NUM_RESULTS], rates[CT_RANK], 100.0 * rates[CT_REJECTED_JUNK], 100.0 * rates[CT_ACCEPTED_JUNK]); *report = formatted_str; delete [] formatted_str; // Now append each field of counts with a tab in front so the result can // be loaded into a spreadsheet. for (int ct = 0; ct < CT_SIZE; ++ct) report->add_str_int("\t", counts.n[ct]); return true; }
// Creates a report of the error rate. The report_level controls the detail // that is reported to stderr via tprintf: // 0 -> no output. // >=1 -> bottom-line error rate. // >=3 -> font-level error rate. // boosting_mode determines the return value. It selects which (un-weighted) // error rate to return. // The fontinfo_table from MasterTrainer provides the names of fonts. // The it determines the current subset of the training samples. // If not NULL, the top-choice unichar error rate is saved in unichar_error. // If not NULL, the report string is saved in fonts_report. // (Ignoring report_level). double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, const UnicityTable<FontInfo>& fontinfo_table, const SampleIterator& it, double* unichar_error, STRING* fonts_report) { // Compute totals over all the fonts and report individual font results // when required. Counts totals; int fontsize = font_counts_.size(); for (int f = 0; f < fontsize; ++f) { // Accumulate counts over fonts. totals += font_counts_[f]; STRING font_report; if (ReportString(font_counts_[f], &font_report)) { if (fonts_report != NULL) { *fonts_report += fontinfo_table.get(f).name; *fonts_report += ": "; *fonts_report += font_report; *fonts_report += "\n"; } if (report_level > 2) { // Report individual font error rates. tprintf("%s: %s\n", fontinfo_table.get(f).name, font_report.string()); } } } if (report_level > 0) { // Report the totals. STRING total_report; if (ReportString(totals, &total_report)) { tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, total_report.string()); } // Report the worst substitution error only for now. if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) { const UNICHARSET& unicharset = it.shape_table()->unicharset(); int charsetsize = unicharset.size(); int shapesize = it.CompactCharsetSize(); int worst_uni_id = 0; int worst_shape_id = 0; int worst_err = 0; for (int u = 0; u < charsetsize; ++u) { for (int s = 0; s < shapesize; ++s) { if (unichar_counts_(u, s) > worst_err) { worst_err = unichar_counts_(u, s); worst_uni_id = u; worst_shape_id = s; } } } if (worst_err > 0) { tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", worst_uni_id, unicharset.id_to_unichar(worst_uni_id), it.shape_table()->DebugStr(worst_shape_id).string(), worst_err, totals.n[CT_UNICHAR_TOP1_ERR], 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]); } } } double rates[CT_SIZE]; if (!ComputeRates(totals, rates)) return 0.0; // Set output values if asked for. if (unichar_error != NULL) *unichar_error = rates[CT_UNICHAR_TOP1_ERR]; return rates[boosting_mode]; }
// Creates a report of the error rate. The report_level controls the detail // that is reported to stderr via tprintf: // 0 -> no output. // >=1 -> bottom-line error rate. // >=3 -> font-level error rate. // boosting_mode determines the return value. It selects which (un-weighted) // error rate to return. // The fontinfo_table from MasterTrainer provides the names of fonts. // The it determines the current subset of the training samples. // If not NULL, the top-choice unichar error rate is saved in unichar_error. // If not NULL, the report string is saved in fonts_report. // (Ignoring report_level). double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, const FontInfoTable& fontinfo_table, const SampleIterator& it, double* unichar_error, STRING* fonts_report) { // Compute totals over all the fonts and report individual font results // when required. Counts totals; int fontsize = font_counts_.size(); for (int f = 0; f < fontsize; ++f) { // Accumulate counts over fonts. totals += font_counts_[f]; STRING font_report; if (ReportString(false, font_counts_[f], &font_report)) { if (fonts_report != NULL) { *fonts_report += fontinfo_table.get(f).name; *fonts_report += ": "; *fonts_report += font_report; *fonts_report += "\n"; } if (report_level > 2) { // Report individual font error rates. tprintf("%s: %s\n", fontinfo_table.get(f).name, font_report.string()); } } } // Report the totals. STRING total_report; bool any_results = ReportString(true, totals, &total_report); if (fonts_report != NULL && fonts_report->length() == 0) { // Make sure we return something even if there were no samples. *fonts_report = "NoSamplesFound: "; *fonts_report += total_report; *fonts_report += "\n"; } if (report_level > 0) { // Report the totals. STRING total_report; if (any_results) { tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, total_report.string()); } // Report the worst substitution error only for now. if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) { int charsetsize = unicharset_.size(); int worst_uni_id = 0; int worst_result_id = 0; int worst_err = 0; for (int u = 0; u < charsetsize; ++u) { for (int v = 0; v < charsetsize; ++v) { if (unichar_counts_(u, v) > worst_err) { worst_err = unichar_counts_(u, v); worst_uni_id = u; worst_result_id = v; } } } if (worst_err > 0) { tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", worst_uni_id, unicharset_.id_to_unichar(worst_uni_id), unicharset_.id_to_unichar(worst_result_id), worst_err, totals.n[CT_UNICHAR_TOP1_ERR], 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]); } } tprintf("Multi-unichar shape use:\n"); for (int u = 0; u < multi_unichar_counts_.size(); ++u) { if (multi_unichar_counts_[u] > 0) { tprintf("%d multiple answers for unichar: %s\n", multi_unichar_counts_[u], unicharset_.id_to_unichar(u)); } } tprintf("OK Score histogram:\n"); ok_score_hist_.print(); tprintf("ERROR Score histogram:\n"); bad_score_hist_.print(); } double rates[CT_SIZE]; if (!ComputeRates(totals, rates)) return 0.0; // Set output values if asked for. if (unichar_error != NULL) *unichar_error = rates[CT_UNICHAR_TOP1_ERR]; return rates[boosting_mode]; }