Exemplo n.º 1
0
/**
 * @name cc_recog
 *
 * Recognize a word.
 */
BLOB_CHOICE_LIST_VECTOR *Wordrec::cc_recog(TWERD *tessword,
                                           WERD_CHOICE *best_choice,
                                           WERD_CHOICE *best_raw_choice,
                                           BOOL8 tester,
                                           BOOL8 trainer,
                                           bool last_word_on_line) {
  int fx;
  BLOB_CHOICE_LIST_VECTOR *results;          /*matcher results */

  if (SetErrorTrap (NULL)) {
    cprintf ("Tess copped out!\n");
    ReleaseErrorTrap();
    class_string (best_choice) = NULL;
    return NULL;
  }
  getDict().InitChoiceAccum();
  getDict().reset_hyphen_vars(last_word_on_line);
  init_match_table();
  for (fx = 0; fx < MAX_FX && (acts[OCR] & (FXSELECT << fx)) == 0; fx++);
  results =
    chop_word_main(tessword,
                   fx,
                   best_choice,
                   best_raw_choice,
                   tester,
                   trainer);
  getDict().DebugWordChoices();
  ReleaseErrorTrap();
  return results;
}
Exemplo n.º 2
0
/**********************************************************************
 * print_matrix
 *
 * Print the best guesses out of the match rating matrix.
 **********************************************************************/
void print_matrix(MATRIX rating_matrix) {
  int x;
  int dimension;
  int spread;
  CHOICES rating;

  cprintf ("Ratings Matrix (top choices)\n");

  dimension = matrix_dimension (rating_matrix);
  /* Do each diagonal */
  for (spread = 0; spread < dimension; spread++) {
    /* For each spot */
    for (x = 0; x < dimension - spread; x++) {
      /* Process one square */
      rating = matrix_get (rating_matrix, x, x + spread);

      if (rating != NOT_CLASSIFIED) {
        cprintf ("\t[%d,%d] : ", x, x + spread);
        if (first_node (rating))
          cprintf ("%-10s%4.0f\t|\t",
            class_string (first_node (rating)),
            class_probability (first_node (rating)));
        if (second_node (rating))
          cprintf ("%-10s%4.0f\t|\t",
            class_string (second_node (rating)),
            class_probability (second_node (rating)));
        if (third (rating))
          cprintf ("%-10s%4.0f\n",
            class_string (third (rating)),
            class_probability (third (rating)));
        else
          new_line();
      }
    }
  }
}
Exemplo n.º 3
0
/**
 * copy_choices
 *
 * Copy a list of choices.  This means that there will be two copies
 * in memory.
 */
CHOICES copy_choices(CHOICES choices) {
  CHOICES l;
  CHOICES result = NIL;

  iterate_list(l, choices) {
    A_CHOICE *choice = (A_CHOICE *)(first_node(l));
    result = push (result,
      (LIST) new_choice (class_string(choice),
                         class_lengths(choice),
                         class_rating(choice),
                         class_certainty(choice),
                         class_config(choice),
                         class_script_id(choice),
                         class_permuter(choice),
                         class_fragment_mark(choice),
                         class_fragment_lengths(choice)));
  }
Exemplo n.º 4
0
/**********************************************************************
 * save_answer
 *
 * Write an answer to the output file that is the raw guess (without
 * context) directly from the classifier.
 **********************************************************************/
void save_answer(TWERD *word,
                 TEXTROW *row,
                 A_CHOICE *best_choice,
                 A_CHOICE *raw_choice,
                 int firstpass) {
  static TEXTROW *last_row;
  char raw_answer[CHARS_PER_LINE];
  int answer_already;
  int good_answer;
  char *string = NULL;

  if (best_choice) {
    good_answer = AcceptableResult (best_choice, raw_choice);
    string = class_string (best_choice);
  }
  else {
    good_answer = FALSE;
  }

  if (firstpass) {
                                 /* First pass */
    if (string) {
                                 /* Got answer */
      add_document_word(best_choice); 

      word->guess = string;
      fix_quotes (word->guess);
      strcpy (raw_answer, word->guess);

      record_certainty (class_certainty (best_choice), 1);

      if (good_answer) {
        record_certainty (class_certainty (best_choice), 2);
        strcat (raw_answer, " ");
        strcat (raw_answer, class_string (raw_choice));
        word->guess = strsave (raw_answer);
        word->guess[strlen (string)] = 0;
        if (string) {
          strfree(string); 
          class_string (best_choice) = NULL;
        }
      }
      else {
                                 /* Not good enough */
        if (word->guess)
          strfree (word->guess);
        word->guess = NULL;
      }
    }
    else {
      word->guess = NULL;
      raw_answer[0] = '\0';
    }
  }
  else {
                                 /* Second pass */
    answer_already = (word->guess != NULL);
    if (answer_already) {
      write_text_files (word,
        &word->guess[strlen (word->guess) + 1],
        (row != last_row), TRUE, TRUE);
    }
    else {
                                 /* Required second pass */
      if (string) {
        if (!good_answer && tessedit_save_stats) {
          SaveBadWord (string, class_certainty (best_choice));
        }
        record_certainty (class_certainty (best_choice), 2);
        word->guess = class_string (best_choice);
        fix_quotes (word->guess);
        write_text_files (word, class_string (raw_choice),
          (row != last_row), good_answer, FALSE);
      }
    }
  }
  /* Word Display */
  if (display_text) {
    if (row != last_row)
      cprintf ("\n");
    if (word->guess && strlen (word->guess))
      cprintf ("%s ", word->guess);
    else
      cprintf ("%s ", raw_answer);
    fflush(stdout); 
  }

  last_row = row;
}
Exemplo n.º 5
0
/**********************************************************************
 * chop_word_main
 *
 * Classify the blobs in this word and permute the results.  Find the
 * worst blob in the word and chop it up.  Continue this process until
 * a good answer has been found or all the blobs have been chopped up
 * enough.  Return the word level ratings.
 **********************************************************************/
CHOICES_LIST chop_word_main(register TWERD *word,
                            int fx,
                            A_CHOICE *best_choice,
                            A_CHOICE *raw_choice,
                            BOOL8 tester,
                            BOOL8 trainer) {
  TBLOB *pblob;
  TBLOB *blob;
  CHOICES_LIST char_choices;
  int index;
  int did_chopping;
  float rating_limit = 1000.0;
  STATE state;
  SEAMS seam_list = NULL;
  CHOICES match_result;
  MATRIX ratings = NULL;
  DANGERR fixpt;                 /*dangerous ambig */
  INT32 state_count;             //no of states
  INT32 bit_count;               //no of bits
  static STATE best_state;
  static STATE chop_states[64];  //in between states

  state_count = 0;
  set_null_choice(best_choice);
  set_null_choice(raw_choice);

  char_choices = new_choice_list ();

  did_chopping = 0;
  for (blob = word->blobs, pblob = NULL, index = 0; blob != NULL;
  blob = blob->next, index++) {
    match_result =
      (CHOICES) classify_blob (pblob, blob, blob->next, NULL, fx,
      "chop_word:", Green, &chop_states[0],
      &best_state, matcher_pass, index);
    char_choices = array_push (char_choices, match_result);
    pblob = blob;
  }
  bit_count = index - 1;
  permute_characters(char_choices, rating_limit, best_choice, raw_choice);

  set_n_ones (&state, array_count (char_choices) - 1);
  if (matcher_fp != NULL) {
    if (matcher_pass == 0) {
      bits_in_states = bit_count;
      chop_states[state_count] = state;
    }
    state_count++;
  }

  if (!AcceptableChoice (char_choices, best_choice, raw_choice, &fixpt)
    || (tester || trainer)
  && strcmp (word->correct, class_string (best_choice))) {
    did_chopping = 1;
    if (first_pass)
      words_chopped1++;
    else
      words_chopped2++;

    seam_list = start_seam_list (word->blobs);

    if (chop_enable)
      improve_by_chopping(word,
                          &char_choices,
                          fx,
                          &state,
                          best_choice,
                          raw_choice,
                          &seam_list,
                          &fixpt,
                          chop_states,
                          &state_count,
                          &best_state,
                          matcher_pass);

    if (chop_debug)
      print_seams ("Final seam list:", seam_list);

    if (enable_assoc &&
      !AcceptableChoice (char_choices, best_choice, raw_choice, NULL)
      || (tester || trainer)
    && strcmp (word->correct, class_string (best_choice))) {
      ratings = word_associator (word->blobs, seam_list, &state, fx,
        best_choice, raw_choice, word->correct,
        /*0, */ &fixpt,
        &best_state, matcher_pass);
    }
    bits_in_states = bit_count + state_count - 1;

  }
  if (ratings != NULL)
    free_matrix(ratings);
  if (did_chopping || tester || trainer)
    char_choices = rebuild_current_state (word->blobs, seam_list, &state,
      char_choices, fx);
  if (seam_list != NULL)
    free_seam_list(seam_list);
  if (matcher_fp != NULL) {
    best_state = state;
  }
  FilterWordChoices();
  return char_choices;
}