Пример #1
0
/**********************************************************************
 * cube_recognize
 *
 * Call cube on the current word, and write the result to word.
 * Sets up a fake result and returns false if something goes wrong.
 **********************************************************************/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
                               WERD_RES *word) {
  // Run cube
  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
    if (cube_debug_level > 0) {
      tprintf("Cube returned nothing for word at:");
      word->word->bounding_box().print();
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Get cube's best result and its probability, mapped to tesseract's
  // certainty range
  char_32 *cube_best_32 = cube_alt_list->Alt(0);
  double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
  float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
  string cube_best_str;
  CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);

  // Retrieve Cube's character bounding boxes and CharSamples,
  // corresponding to the most recent call to RecognizeWord().
  Boxa *char_boxes = NULL;
  CharSamp **char_samples = NULL;;
  int num_chars;
  if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
      && cube_debug_level > 0) {
    tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
            "cube state.\n");
    word->SetupFake(unicharset);
    return false;
  }

  // Convert cube's character bounding boxes to a BoxWord.
  BoxWord cube_box_word;
  TBOX tess_word_box = word->word->bounding_box();
  if (word->denorm.block() != NULL)
    tess_word_box.rotate(word->denorm.block()->re_rotation());
  bool box_word_success = create_cube_box_word(char_boxes, num_chars,
                                               tess_word_box,
                                               &cube_box_word);
  boxaDestroy(&char_boxes);
  if (!box_word_success) {
    if (cube_debug_level > 0) {
      tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
              "create cube BoxWord\n");
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Fill tesseract result's fields with cube results
  fill_werd_res(cube_box_word, cube_best_str.c_str(), word);

  // Create cube's best choice.
  BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
  for (int i = 0; i < num_chars; ++i) {
    UNICHAR_ID uch_id =
        cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
    choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
                                 -1, -1, 0, 0, 0, 0, BCC_STATIC_CLASSIFIER);
  }
  word->FakeClassifyWord(num_chars, choices);
  // within a word, cube recognizes the word in reading order.
  word->best_choice->set_unichars_in_script_order(true);
  delete [] choices;
  delete [] char_samples;

  // Some sanity checks
  ASSERT_HOST(word->best_choice->length() == word->reject_map.length());

  if (cube_debug_level || classify_debug_level) {
    tprintf("Cube result: %s r=%g, c=%g\n",
            word->best_choice->unichar_string().string(),
            word->best_choice->rating(),
            word->best_choice->certainty());
  }
  return true;
}
Пример #2
0
/**********************************************************************
 * cube_recognize
 *
 * Call cube on the current word, and write the result to word.
 * Sets up a fake result and returns false if something goes wrong.
 **********************************************************************/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
                               WERD_RES *word) {
  if (!word->SetupForCubeRecognition(unicharset, this, block)) {
    return false;  // Graphics block.
  }

  // Run cube
  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
    if (cube_debug_level > 0) {
      tprintf("Cube returned nothing for word at:");
      word->word->bounding_box().print();
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Get cube's best result and its probability, mapped to tesseract's
  // certainty range
  char_32 *cube_best_32 = cube_alt_list->Alt(0);
  double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
  float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
  std::string cube_best_str;
  CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);

  // Retrieve Cube's character bounding boxes and CharSamples,
  // corresponding to the most recent call to RecognizeWord().
  Boxa *char_boxes = NULL;
  CharSamp **char_samples = NULL;;
  int num_chars;
  if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
      && cube_debug_level > 0) {
    tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
            "cube state.\n");
    word->SetupFake(unicharset);
    return false;
  }

  // Convert cube's character bounding boxes to a BoxWord.
  BoxWord cube_box_word;
  TBOX tess_word_box = word->word->bounding_box();
  if (word->denorm.block() != NULL)
    tess_word_box.rotate(word->denorm.block()->re_rotation());
  bool box_word_success = create_cube_box_word(char_boxes, num_chars,
                                               tess_word_box,
                                               &cube_box_word);
  boxaDestroy(&char_boxes);
  if (!box_word_success) {
    if (cube_debug_level > 0) {
      tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
              "create cube BoxWord\n");
    }
    word->SetupFake(unicharset);
    return false;
  }

  // Create cube's best choice.
  WERD_CHOICE* cube_werd_choice = create_werd_choice(
      char_samples, num_chars, cube_best_str.c_str(), cube_certainty,
      unicharset, cube_cntxt_->CharacterSet());
  delete []char_samples;

  if (!cube_werd_choice) {
    if (cube_debug_level > 0) {
      tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
              "create cube WERD_CHOICE\n");
    }
    word->SetupFake(unicharset);
    return false;
  }
  if (cube_debug_level || classify_debug_level) {
    tprintf("Cube result: %s r=%g, c=%g\n",
            cube_werd_choice->unichar_string().string(),
            cube_werd_choice->rating(),
            cube_werd_choice->certainty());
  }

  // Fill tesseract result's fields with cube results
  fill_werd_res(cube_box_word, cube_werd_choice, cube_best_str.c_str(), word);
  return true;
}