예제 #1
0
// Sets the report string to a combined human and machine-readable report
// string of the error rates.
// Returns false if there is no data, leaving report unchanged, unless
// even_if_empty is true.
bool ErrorCounter::ReportString(bool even_if_empty, const Counts& counts,
                                STRING* report) {
  // Compute the error rates.
  double rates[CT_SIZE];
  if (!ComputeRates(counts, rates) && !even_if_empty)
    return false;
  // Using %.4g%%, the length of the output string should exactly match the
  // length of the format string, but in case of overflow, allow for +eddd
  // on each number.
  const int kMaxExtraLength = 5;  // Length of +eddd.
  // Keep this format string and the snprintf in sync with the CountTypes enum.
  const char* format_str = "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] "
                           "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, "
                           "FontAttr=%.4g%%, Multi=%.4g%%, "
                           "Answers=%.3g, Rank=%.3g, "
                           "OKjunk=%.4g%%, Badjunk=%.4g%%";
  int max_str_len = strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1;
  char* formatted_str = new char[max_str_len];
  snprintf(formatted_str, max_str_len, format_str,
           rates[CT_UNICHAR_TOP1_ERR] * 100.0,
           rates[CT_UNICHAR_TOP2_ERR] * 100.0,
           rates[CT_UNICHAR_TOPN_ERR] * 100.0,
           rates[CT_UNICHAR_TOPTOP_ERR] * 100.0,
           rates[CT_OK_MULTI_UNICHAR] * 100.0,
           rates[CT_OK_JOINED] * 100.0,
           rates[CT_OK_BROKEN] * 100.0,
           rates[CT_REJECT] * 100.0,
           rates[CT_FONT_ATTR_ERR] * 100.0,
           rates[CT_OK_MULTI_FONT] * 100.0,
           rates[CT_NUM_RESULTS],
           rates[CT_RANK],
           100.0 * rates[CT_REJECTED_JUNK],
           100.0 * rates[CT_ACCEPTED_JUNK]);
  *report = formatted_str;
  delete [] formatted_str;
  // Now append each field of counts with a tab in front so the result can
  // be loaded into a spreadsheet.
  for (int ct = 0; ct < CT_SIZE; ++ct)
    report->add_str_int("\t", counts.n[ct]);
  return true;
}
예제 #2
0
// Creates a report of the error rate. The report_level controls the detail
// that is reported to stderr via tprintf:
// 0   -> no output.
// >=1 -> bottom-line error rate.
// >=3 -> font-level error rate.
// boosting_mode determines the return value. It selects which (un-weighted)
// error rate to return.
// The fontinfo_table from MasterTrainer provides the names of fonts.
// The it determines the current subset of the training samples.
// If not NULL, the top-choice unichar error rate is saved in unichar_error.
// If not NULL, the report string is saved in fonts_report.
// (Ignoring report_level).
double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
                                  const UnicityTable<FontInfo>& fontinfo_table,
                                  const SampleIterator& it,
                                  double* unichar_error,
                                  STRING* fonts_report) {
  // Compute totals over all the fonts and report individual font results
  // when required.
  Counts totals;
  int fontsize = font_counts_.size();
  for (int f = 0; f < fontsize; ++f) {
    // Accumulate counts over fonts.
    totals += font_counts_[f];
    STRING font_report;
    if (ReportString(font_counts_[f], &font_report)) {
      if (fonts_report != NULL) {
        *fonts_report += fontinfo_table.get(f).name;
        *fonts_report += ": ";
        *fonts_report += font_report;
        *fonts_report += "\n";
      }
      if (report_level > 2) {
        // Report individual font error rates.
        tprintf("%s: %s\n", fontinfo_table.get(f).name, font_report.string());
      }
    }
  }
  if (report_level > 0) {
    // Report the totals.
    STRING total_report;
    if (ReportString(totals, &total_report)) {
      tprintf("TOTAL Scaled Err=%.4g%%, %s\n",
              scaled_error_ * 100.0, total_report.string());
    }
    // Report the worst substitution error only for now.
    if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) {
      const UNICHARSET& unicharset = it.shape_table()->unicharset();
      int charsetsize = unicharset.size();
      int shapesize = it.CompactCharsetSize();
      int worst_uni_id = 0;
      int worst_shape_id = 0;
      int worst_err = 0;
      for (int u = 0; u < charsetsize; ++u) {
        for (int s = 0; s < shapesize; ++s) {
          if (unichar_counts_(u, s) > worst_err) {
            worst_err = unichar_counts_(u, s);
            worst_uni_id = u;
            worst_shape_id = s;
          }
        }
      }
      if (worst_err > 0) {
        tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n",
                worst_uni_id, unicharset.id_to_unichar(worst_uni_id),
                it.shape_table()->DebugStr(worst_shape_id).string(),
                worst_err, totals.n[CT_UNICHAR_TOP1_ERR],
                100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]);
      }
    }
  }
  double rates[CT_SIZE];
  if (!ComputeRates(totals, rates))
    return 0.0;
  // Set output values if asked for.
  if (unichar_error != NULL)
    *unichar_error = rates[CT_UNICHAR_TOP1_ERR];
  return rates[boosting_mode];
}
예제 #3
0
// Creates a report of the error rate. The report_level controls the detail
// that is reported to stderr via tprintf:
// 0   -> no output.
// >=1 -> bottom-line error rate.
// >=3 -> font-level error rate.
// boosting_mode determines the return value. It selects which (un-weighted)
// error rate to return.
// The fontinfo_table from MasterTrainer provides the names of fonts.
// The it determines the current subset of the training samples.
// If not NULL, the top-choice unichar error rate is saved in unichar_error.
// If not NULL, the report string is saved in fonts_report.
// (Ignoring report_level).
double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
                                  const FontInfoTable& fontinfo_table,
                                  const SampleIterator& it,
                                  double* unichar_error,
                                  STRING* fonts_report) {
  // Compute totals over all the fonts and report individual font results
  // when required.
  Counts totals;
  int fontsize = font_counts_.size();
  for (int f = 0; f < fontsize; ++f) {
    // Accumulate counts over fonts.
    totals += font_counts_[f];
    STRING font_report;
    if (ReportString(false, font_counts_[f], &font_report)) {
      if (fonts_report != NULL) {
        *fonts_report += fontinfo_table.get(f).name;
        *fonts_report += ": ";
        *fonts_report += font_report;
        *fonts_report += "\n";
      }
      if (report_level > 2) {
        // Report individual font error rates.
        tprintf("%s: %s\n", fontinfo_table.get(f).name, font_report.string());
      }
    }
  }
  // Report the totals.
  STRING total_report;
  bool any_results = ReportString(true, totals, &total_report);
  if (fonts_report != NULL && fonts_report->length() == 0) {
    // Make sure we return something even if there were no samples.
    *fonts_report = "NoSamplesFound: ";
    *fonts_report += total_report;
    *fonts_report += "\n";
  }
  if (report_level > 0) {
    // Report the totals.
    STRING total_report;
    if (any_results) {
      tprintf("TOTAL Scaled Err=%.4g%%, %s\n",
              scaled_error_ * 100.0, total_report.string());
    }
    // Report the worst substitution error only for now.
    if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) {
      int charsetsize = unicharset_.size();
      int worst_uni_id = 0;
      int worst_result_id = 0;
      int worst_err = 0;
      for (int u = 0; u < charsetsize; ++u) {
        for (int v = 0; v < charsetsize; ++v) {
          if (unichar_counts_(u, v) > worst_err) {
            worst_err = unichar_counts_(u, v);
            worst_uni_id = u;
            worst_result_id = v;
          }
        }
      }
      if (worst_err > 0) {
        tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n",
                worst_uni_id, unicharset_.id_to_unichar(worst_uni_id),
                unicharset_.id_to_unichar(worst_result_id),
                worst_err, totals.n[CT_UNICHAR_TOP1_ERR],
                100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]);
      }
    }
    tprintf("Multi-unichar shape use:\n");
    for (int u = 0; u < multi_unichar_counts_.size(); ++u) {
      if (multi_unichar_counts_[u] > 0) {
        tprintf("%d multiple answers for unichar: %s\n",
                multi_unichar_counts_[u],
                unicharset_.id_to_unichar(u));
      }
    }
    tprintf("OK Score histogram:\n");
    ok_score_hist_.print();
    tprintf("ERROR Score histogram:\n");
    bad_score_hist_.print();
  }

  double rates[CT_SIZE];
  if (!ComputeRates(totals, rates))
    return 0.0;
  // Set output values if asked for.
  if (unichar_error != NULL)
    *unichar_error = rates[CT_UNICHAR_TOP1_ERR];
  return rates[boosting_mode];
}