コード例 #1
0
PBLOB *rotate_cblob(                 //rotate it
                    C_BLOB *blob,    //blob to search
                    float xheight,   //for poly approx
                    FCOORD rotation  //for landscape
                   ) {
  PBLOB *copy;                   //copy of blob
  POLYPT *polypt;                //current point
  OUTLINE_IT out_it;
  POLYPT_IT poly_it;             //outline pts

  copy = new PBLOB (blob, xheight);
  out_it.set_to_list (copy->out_list ());
  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
                                 //get points
    poly_it.set_to_list (out_it.data ()->polypts ());
    for (poly_it.mark_cycle_pt (); !poly_it.cycled_list ();
    poly_it.forward ()) {
      polypt = poly_it.data ();
                                 //rotate it
      polypt->pos.rotate (rotation);
      polypt->vec.rotate (rotation);
    }
    out_it.data ()->compute_bb ();
  }
  return copy;
}
コード例 #2
0
PBLOB *rotate_blob(                 //get y limits
                   PBLOB *blob,     //blob to search
                   FCOORD rotation  //vector to rotate by
                  ) {
  PBLOB *copy;                   //copy of blob
  POLYPT *polypt;                //current point
  OUTLINE_IT out_it;
  POLYPT_IT poly_it;             //outline pts

  copy = new PBLOB;
  *copy = *blob;                 //deep copy
  out_it.set_to_list (copy->out_list ());
  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
                                 //get points
    poly_it.set_to_list (out_it.data ()->polypts ());
    for (poly_it.mark_cycle_pt (); !poly_it.cycled_list ();
    poly_it.forward ()) {
      polypt = poly_it.data ();
                                 //rotate it
      polypt->pos.rotate (rotation);
      polypt->vec.rotate (rotation);
    }
    out_it.data ()->compute_bb ();
  }
  return copy;
}
コード例 #3
0
ファイル: baseapi.cpp プロジェクト: GaryShearer/BasicOCR
// Extract the OCR results, costs (penalty points for uncertainty),
// and the bounding boxes of the characters.
static void extract_result(ELIST_ITERATOR *out,
                           PAGE_RES* page_res) {
  PAGE_RES_IT page_res_it(page_res);
  int word_count = 0;
  while (page_res_it.word() != NULL) {
    WERD_RES *word = page_res_it.word();
    const char *str = word->best_choice->string().string();
    const char *len = word->best_choice->lengths().string();

    if (word_count)
      add_space(out);
    TBOX bln_rect;
    PBLOB_LIST *blobs = word->outword->blob_list();
    PBLOB_IT it(blobs);
    int n = strlen(len);
    TBOX** boxes_to_fix = new TBOX*[n];
    for (int i = 0; i < n; i++) {
      PBLOB *blob = it.data();
      TBOX current = blob->bounding_box();
      bln_rect = bln_rect.bounding_union(current);
      TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
                                    str, *len);
      tc->box = current;
      boxes_to_fix[i] = &tc->box;

      out->add_after_then_move(tc);
      it.forward();
      str += *len;
      len++;
    }

    // Find the word bbox before normalization.
    // Here we can't use the C_BLOB bboxes directly,
    // since connected letters are not yet cut.
    TBOX real_rect = word->word->bounding_box();

    // Denormalize boxes by transforming the bbox of the whole bln word
    // into the denorm bbox (`real_rect') of the whole word.
    double x_stretch = double(real_rect.width()) / bln_rect.width();
    double y_stretch = double(real_rect.height()) / bln_rect.height();
    for (int j = 0; j < n; j++) {
      TBOX *box = boxes_to_fix[j];
      int x0 = int(real_rect.left() +
                   x_stretch * (box->left() - bln_rect.left()) + 0.5);
      int x1 = int(real_rect.left() +
                   x_stretch * (box->right() - bln_rect.left()) + 0.5);
      int y0 = int(real_rect.bottom() +
                   y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5);
      int y1 = int(real_rect.bottom() +
                   y_stretch * (box->top() - bln_rect.bottom()) + 0.5);
      *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1));
    }
    delete [] boxes_to_fix;

    page_res_it.forward();
    word_count++;
  }
}
コード例 #4
0
ファイル: baseapi.cpp プロジェクト: GaryShearer/BasicOCR
static int ConvertWordToBoxText(WERD_RES *word,
                                ROW_RES* row,
                                int left,
                                int bottom,
                                char* word_str) {
  // Copy the output word and denormalize it back to image coords.
  WERD copy_outword;
  copy_outword = *(word->outword);
  copy_outword.baseline_denormalise(&word->denorm);
  PBLOB_IT blob_it;
  blob_it.set_to_list(copy_outword.blob_list());
  int length = copy_outword.blob_list()->length();
  int output_size = 0;

  if (length > 0) {
    for (int index = 0, offset = 0; index < length;
         offset += word->best_choice->lengths()[index++], blob_it.forward()) {
      PBLOB* blob = blob_it.data();
      TBOX blob_box = blob->bounding_box();
      int box_left = MAX(blob_box.left(), 0);
      int box_right = MIN(blob_box.right(), page_image.get_xsize());
      int box_bottom = MAX(blob_box.bottom(), 0);
      int box_top = MIN(blob_box.top(), page_image.get_ysize());

      if (word->tess_failed || box_left >= box_right || box_bottom >= box_top) {
        // Bounding boxes can be illegal when tess fails on a word.
        TBOX word_box = word->word->bounding_box();  // Original word is backup.
        if (box_left < word_box.left()) box_left = word_box.left();
        if (box_right > word_box.right()) box_right = word_box.right();
        if (box_bottom < word_box.bottom()) box_bottom = word_box.bottom();
        if (box_top > word_box.top()) box_top = word_box.top();
        tprintf("Using substitute bounding box at (%d,%d)->(%d,%d)\n",
                box_left, box_bottom, box_right, box_top);
      }

      // A single classification unit can be composed of several UTF-8
      // characters. Append each of them to the result.
      for (int sub = 0; sub < word->best_choice->lengths()[index]; ++sub) {
        char ch = word->best_choice->string()[offset + sub];
        // Tesseract uses space for recognition failure. Fix to a reject
        // character, '~' so we don't create illegal box files.
        if (ch == ' ')
          ch = '~';
        word_str[output_size++] = ch;
      }
      sprintf(word_str + output_size, " %d %d %d %d\n",
              box_left + left,box_bottom + bottom,
              box_right + left, box_top + bottom);
      output_size += strlen(word_str + output_size);
    }
  }
  return output_size;
}
コード例 #5
0
ファイル: tfacepp.cpp プロジェクト: chanchai/botker
WERD_CHOICE *split_and_recog_word(                           //recog one owrd
                                  WERD *word,                //word to do
                                  DENORM *denorm,            //de-normaliser
                                  POLY_MATCHER matcher,      //matcher function
                                  POLY_TESTER tester,        //tester function
                                  POLY_TESTER trainer,       //trainer function
                                  BOOL8 testing,             //true if answer driven
                                  WERD_CHOICE *&raw_choice,  //raw result //list of blob lists
                                  BLOB_CHOICE_LIST_CLIST *blob_choices,
                                  WERD *&outword             //bln word output
                                 ) {
  //   inT32                                                      outword1_len;
  //   inT32                                                      outword2_len;
  WERD *first_word;              //poly copy of word
  WERD *second_word;             //fabricated word
  WERD *outword2;                //2nd output word
  PBLOB *blob;
  WERD_CHOICE *result;           //resturn value
  WERD_CHOICE *result2;          //output of 2nd word
  WERD_CHOICE *raw_choice2;      //raw version of 2nd
  float gap;                     //blob gap
  float bestgap;                 //biggest gap
  PBLOB_LIST new_blobs;          //list of gathered blobs
  PBLOB_IT blob_it;
                                 //iterator
  PBLOB_IT new_blob_it = &new_blobs;

  first_word = word->poly_copy (denorm->row ()->x_height ());
  blob_it.set_to_list (first_word->blob_list ());
  bestgap = -MAX_INT32;
  while (!blob_it.at_last ()) {
    blob = blob_it.data ();
                                 //gap to next
    gap = blob_it.data_relative (1)->bounding_box ().left () - blob->bounding_box ().right ();
    blob_it.forward ();
    if (gap > bestgap) {
      bestgap = gap;             //find biggest
      new_blob_it = blob_it;     //save position
    }
  }
                                 //take 2nd half
  new_blobs.assign_to_sublist (&new_blob_it, &blob_it);
                                 //make it a word
  second_word = new WERD (&new_blobs, 1, NULL);
  ASSERT_HOST (word->blob_list ()->length () ==
    first_word->blob_list ()->length () +
    second_word->blob_list ()->length ());

  result = recog_word_recursive (first_word, denorm, matcher,
    tester, trainer, testing, raw_choice,
    blob_choices, outword);
  delete first_word;             //done that one
  result2 = recog_word_recursive (second_word, denorm, matcher,
    tester, trainer, testing, raw_choice2,
    blob_choices, outword2);
  delete second_word;            //done that too
  *result += *result2;           //combine ratings
  delete result2;
  *raw_choice += *raw_choice2;
  delete raw_choice2;            //finished with it
  //   outword1_len= outword->blob_list()->length();
  //   outword2_len= outword2->blob_list()->length();
  outword->join_on (outword2);   //join words
  delete outword2;
  //   if ( outword->blob_list()->length() != outword1_len + outword2_len )
  //      tprintf( "Split&Recog: part1len=%d; part2len=%d; combinedlen=%d\n",
  //                                outword1_len, outword2_len, outword->blob_list()->length() );
  //   ASSERT_HOST( outword->blob_list()->length() == outword1_len + outword2_len );
  return result;
}