Ejemplo n.º 1
0
/**********************************************************************
 * run_cube_combiner
 *
 * Iterates through tesseract's results and calls cube on each word,
 * combining the results with the existing tesseract result.
 **********************************************************************/
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
  if (page_res == NULL || tess_cube_combiner_ == NULL)
    return;
  PAGE_RES_IT page_res_it(page_res);
  // Iterate through the word results and call cube on each word.
  for (page_res_it.restart_page(); page_res_it.word () != NULL;
       page_res_it.forward()) {
    BLOCK* block = page_res_it.block()->block;
    if (block->poly_block() != NULL && !block->poly_block()->IsText())
      continue;  // Don't deal with non-text blocks.
    WERD_RES* word = page_res_it.word();
    // Skip cube entirely if tesseract's certainty is greater than threshold.
    int combiner_run_thresh = convert_prob_to_tess_certainty(
        cube_cntxt_->Params()->CombinerRunThresh());
    if (word->best_choice->certainty() >= combiner_run_thresh) {
      continue;
    }
    // Use the same language as Tesseract used for the word.
    Tesseract* lang_tess = word->tesseract;

    // Setup a trial WERD_RES in which to classify with cube.
    WERD_RES cube_word;
    cube_word.InitForRetryRecognition(*word);
    cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
                                  OEM_CUBE_ONLY,
                                  NULL, false, false, false,
                                  page_res_it.row()->row,
                                  page_res_it.block()->block);
    CubeObject *cube_obj = lang_tess->cube_recognize_word(
        page_res_it.block()->block, &cube_word);
    if (cube_obj != NULL)
      lang_tess->cube_combine_word(cube_obj, &cube_word, word);
    delete cube_obj;
  }
}
Ejemplo n.º 2
0
/**********************************************************************
 * run_cube_combiner
 *
 * Iterates through tesseract's results and calls cube on each word,
 * combining the results with the existing tesseract result.
 **********************************************************************/
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
  if (page_res == NULL || tess_cube_combiner_ == NULL)
    return;
  PAGE_RES_IT page_res_it(page_res);
  // Iterate through the word results and call cube on each word.
  for (page_res_it.restart_page(); page_res_it.word () != NULL;
       page_res_it.forward()) {
    WERD_RES* word = page_res_it.word();
    // Skip cube entirely if tesseract's certainty is greater than threshold.
    int combiner_run_thresh = convert_prob_to_tess_certainty(
        cube_cntxt_->Params()->CombinerRunThresh());
    if (word->best_choice->certainty() >= combiner_run_thresh) {
      continue;
    }
    // Use the same language as Tesseract used for the word.
    Tesseract* lang_tess = word->tesseract;

    // Setup a trial WERD_RES in which to classify with cube.
    WERD_RES cube_word;
    cube_word.InitForRetryRecognition(*word);
    CubeObject *cube_obj = lang_tess->cube_recognize_word(
        page_res_it.block()->block, &cube_word);
    if (cube_obj != NULL)
      lang_tess->cube_combine_word(cube_obj, &cube_word, word);
    delete cube_obj;
  }
}
Ejemplo n.º 3
0
/**********************************************************************
 * cube_recognize
 *
 * Call cube on the current word, and write the result to word.
 * Sets up a fake result and returns false if something goes wrong.
 **********************************************************************/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
                               WERD_RES *word) {
  // Run cube
  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
    if (cube_debug_level > 0) {
      tprintf("Cube returned nothing for word at:");
      word->word->bounding_box().print();
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Get cube's best result and its probability, mapped to tesseract's
  // certainty range
  char_32 *cube_best_32 = cube_alt_list->Alt(0);
  double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
  float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
  string cube_best_str;
  CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);

  // Retrieve Cube's character bounding boxes and CharSamples,
  // corresponding to the most recent call to RecognizeWord().
  Boxa *char_boxes = NULL;
  CharSamp **char_samples = NULL;;
  int num_chars;
  if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
      && cube_debug_level > 0) {
    tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
            "cube state.\n");
    word->SetupFake(unicharset);
    return false;
  }

  // Convert cube's character bounding boxes to a BoxWord.
  BoxWord cube_box_word;
  TBOX tess_word_box = word->word->bounding_box();
  if (word->denorm.block() != NULL)
    tess_word_box.rotate(word->denorm.block()->re_rotation());
  bool box_word_success = create_cube_box_word(char_boxes, num_chars,
                                               tess_word_box,
                                               &cube_box_word);
  boxaDestroy(&char_boxes);
  if (!box_word_success) {
    if (cube_debug_level > 0) {
      tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
              "create cube BoxWord\n");
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Fill tesseract result's fields with cube results
  fill_werd_res(cube_box_word, cube_best_str.c_str(), word);

  // Create cube's best choice.
  BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
  for (int i = 0; i < num_chars; ++i) {
    UNICHAR_ID uch_id =
        cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
    choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
                                 -1, -1, 0, 0, 0, 0, BCC_STATIC_CLASSIFIER);
  }
  word->FakeClassifyWord(num_chars, choices);
  // within a word, cube recognizes the word in reading order.
  word->best_choice->set_unichars_in_script_order(true);
  delete [] choices;
  delete [] char_samples;

  // Some sanity checks
  ASSERT_HOST(word->best_choice->length() == word->reject_map.length());

  if (cube_debug_level || classify_debug_level) {
    tprintf("Cube result: %s r=%g, c=%g\n",
            word->best_choice->unichar_string().string(),
            word->best_choice->rating(),
            word->best_choice->certainty());
  }
  return true;
}
Ejemplo n.º 4
0
/**********************************************************************
 * cube_recognize
 *
 * Call cube on the current word, and write the result to word.
 * Sets up a fake result and returns false if something goes wrong.
 **********************************************************************/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
                               WERD_RES *word) {
  if (!word->SetupForCubeRecognition(unicharset, this, block)) {
    return false;  // Graphics block.
  }

  // Run cube
  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
    if (cube_debug_level > 0) {
      tprintf("Cube returned nothing for word at:");
      word->word->bounding_box().print();
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Get cube's best result and its probability, mapped to tesseract's
  // certainty range
  char_32 *cube_best_32 = cube_alt_list->Alt(0);
  double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
  float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
  std::string cube_best_str;
  CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);

  // Retrieve Cube's character bounding boxes and CharSamples,
  // corresponding to the most recent call to RecognizeWord().
  Boxa *char_boxes = NULL;
  CharSamp **char_samples = NULL;;
  int num_chars;
  if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
      && cube_debug_level > 0) {
    tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
            "cube state.\n");
    word->SetupFake(unicharset);
    return false;
  }

  // Convert cube's character bounding boxes to a BoxWord.
  BoxWord cube_box_word;
  TBOX tess_word_box = word->word->bounding_box();
  if (word->denorm.block() != NULL)
    tess_word_box.rotate(word->denorm.block()->re_rotation());
  bool box_word_success = create_cube_box_word(char_boxes, num_chars,
                                               tess_word_box,
                                               &cube_box_word);
  boxaDestroy(&char_boxes);
  if (!box_word_success) {
    if (cube_debug_level > 0) {
      tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
              "create cube BoxWord\n");
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Create cube's best choice.
  WERD_CHOICE* cube_werd_choice = create_werd_choice(
      char_samples, num_chars, cube_best_str.c_str(), cube_certainty,
      unicharset, cube_cntxt_->CharacterSet());
  delete []char_samples;

  if (!cube_werd_choice) {
    if (cube_debug_level > 0) {
      tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
              "create cube WERD_CHOICE\n");
    }
    word->SetupFake(unicharset);
    return false;
  }
  if (cube_debug_level || classify_debug_level) {
    tprintf("Cube result: %s r=%g, c=%g\n",
            cube_werd_choice->unichar_string().string(),
            cube_werd_choice->rating(),
            cube_werd_choice->certainty());
  }

  // Fill tesseract result's fields with cube results
  fill_werd_res(cube_box_word, cube_werd_choice, cube_best_str.c_str(), word);
  return true;
}