/**
 * print_char_choices_list
 */
void print_char_choices_list(const char *msg,
                             const BLOB_CHOICE_LIST_VECTOR &char_choices,
                             const UNICHARSET &current_unicharset,
                             BOOL8 detailed) {
  if (*msg != '\0') tprintf("%s\n", msg);
  for (int x = 0; x < char_choices.length(); ++x) {
    BLOB_CHOICE_IT c_it;
    c_it.set_to_list(char_choices.get(x));
    tprintf("char[%d]: %s\n", x,
            current_unicharset.debug_str( c_it.data()->unichar_id()).string());
    if (detailed)
      print_ratings_list("  ", char_choices.get(x), current_unicharset);
  }
}
Exemple #2
0
/**
 * permute_choices
 *
 * Call append_choices() for each BLOB_CHOICE in BLOB_CHOICE_LIST
 * with the given char_choice_index in char_choices.
 */
void Dict::permute_choices(
    const char *debug,
    const BLOB_CHOICE_LIST_VECTOR &char_choices,
    int char_choice_index,
    const CHAR_FRAGMENT_INFO *prev_char_frag_info,
    WERD_CHOICE *word,
    float certainties[],
    float *limit,
    WERD_CHOICE *best_choice,
    int *attempts_left,
    void *more_args) {
  if (debug) {
    tprintf("%s permute_choices: char_choice_index=%d"
            " limit=%g rating=%g, certainty=%g word=%s\n",
            debug, char_choice_index, *limit, word->rating(),
            word->certainty(), word->debug_string().string());
  }
  if (char_choice_index < char_choices.length()) {
    BLOB_CHOICE_IT blob_choice_it;
    blob_choice_it.set_to_list(char_choices.get(char_choice_index));
    for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
         blob_choice_it.forward()) {
      (*attempts_left)--;
      append_choices(debug, char_choices, *(blob_choice_it.data()),
                     char_choice_index, prev_char_frag_info, word,
                     certainties, limit, best_choice, attempts_left, more_args);
      if (*attempts_left <= 0) {
        if (debug) tprintf("permute_choices(): attempts_left is 0\n");
        break;
      }
    }
  }
}
Exemple #3
0
/**********************************************************************
 * select_blob_to_split
 *
 * These are the results of the last classification.  Find a likely
 * place to apply splits.
 **********************************************************************/
inT16 Wordrec::select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices,
                                    float rating_ceiling,
                                    bool split_next_to_fragment) {
  BLOB_CHOICE_IT blob_choice_it;
  BLOB_CHOICE *blob_choice;
  BLOB_CHOICE_IT temp_it;
  int x;
  float worst = -MAX_FLOAT32;
  int worst_index = -1;
  float worst_near_fragment = -MAX_FLOAT32;
  int worst_index_near_fragment = -1;
  const CHAR_FRAGMENT **fragments = NULL;

  if (chop_debug) {
    if (rating_ceiling < MAX_FLOAT32)
      cprintf("rating_ceiling = %8.4f\n", rating_ceiling);
    else
      cprintf("rating_ceiling = No Limit\n");
  }

  if (split_next_to_fragment && char_choices.length() > 0) {
    fragments = new const CHAR_FRAGMENT *[char_choices.length()];
    if (char_choices.get(0) != NULL) {
      temp_it.set_to_list(char_choices.get(0));
      fragments[0] = getDict().getUnicharset().get_fragment(
          temp_it.data()->unichar_id());
    } else {
      fragments[0] = NULL;
    }
  }

  for (x = 0; x < char_choices.length(); ++x) {
    if (char_choices.get(x) == NULL) {
      if (fragments != NULL) {
        delete[] fragments;
      }
      return x;
    } else {
      blob_choice_it.set_to_list(char_choices.get(x));
      blob_choice = blob_choice_it.data();
      // Populate fragments for the following position.
      if (split_next_to_fragment && x+1 < char_choices.length()) {
        if (char_choices.get(x+1) != NULL) {
          temp_it.set_to_list(char_choices.get(x+1));
          fragments[x+1] = getDict().getUnicharset().get_fragment(
              temp_it.data()->unichar_id());
        } else {
          fragments[x+1] = NULL;
        }
      }
      if (blob_choice->rating() < rating_ceiling &&
          blob_choice->certainty() < tessedit_certainty_threshold) {
        // Update worst and worst_index.
        if (blob_choice->rating() > worst) {
          worst_index = x;
          worst = blob_choice->rating();
        }
        if (split_next_to_fragment) {
          // Update worst_near_fragment and worst_index_near_fragment.
          bool expand_following_fragment =
            (x + 1 < char_choices.length() &&
             fragments[x+1] != NULL && !fragments[x+1]->is_beginning());
          bool expand_preceding_fragment =
            (x > 0 && fragments[x-1] != NULL && !fragments[x-1]->is_ending());
          if ((expand_following_fragment || expand_preceding_fragment) &&
              blob_choice->rating() > worst_near_fragment) {
            worst_index_near_fragment = x;
            worst_near_fragment = blob_choice->rating();
            if (chop_debug) {
              cprintf("worst_index_near_fragment=%d"
                      " expand_following_fragment=%d"
                      " expand_preceding_fragment=%d\n",
                      worst_index_near_fragment,
                      expand_following_fragment,
                      expand_preceding_fragment);
            }
          }
        }
      }
    }
  }
  if (fragments != NULL) {
    delete[] fragments;
  }
  // TODO(daria): maybe a threshold of badness for
  // worst_near_fragment would be useful.
  return worst_index_near_fragment != -1 ?
    worst_index_near_fragment : worst_index;
}