Пример #1
0
void BLOCK::compress() {  // squash it up
  #define           ROW_SPACING 5

  ROW_IT row_it(&rows);
  ROW *row;
  ICOORD row_spacing (0, ROW_SPACING);

  ICOORDELT_IT icoordelt_it;

  sort_rows();

  box = TBOX (box.topleft (), box.topleft ());
  box.move_bottom_edge (ROW_SPACING);
  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
    row = row_it.data ();
    row->move (box.botleft () - row_spacing -
      row->bounding_box ().topleft ());
    box += row->bounding_box ();
  }

  leftside.clear ();
  icoordelt_it.set_to_list (&leftside);
  icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.bottom ()));
  icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.top ()));
  rightside.clear ();
  icoordelt_it.set_to_list (&rightside);
  icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.bottom ()));
  icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.top ()));
}
Пример #2
0
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
/// All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
                                     BLOCK_LIST *block_list) {
  PreenXHeights(block_list);
  // Strip all fuzzy space markers to simplify the PAGE_RES.
  BLOCK_IT b_it(block_list);
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
    BLOCK* block = b_it.data();
    ROW_IT r_it(block->row_list());
    for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) {
      ROW* row = r_it.data();
      WERD_IT w_it(row->word_list());
      for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
        WERD* word = w_it.data();
        if (word->cblob_list()->empty()) {
          delete w_it.extract();
        } else {
          word->set_flag(W_FUZZY_SP, false);
          word->set_flag(W_FUZZY_NON, false);
        }
      }
    }
  }
  PAGE_RES* page_res = new PAGE_RES(false, block_list, NULL);
  PAGE_RES_IT pr_it(page_res);
  WERD_RES* word_res;
  while ((word_res = pr_it.word()) != NULL) {
    MaximallyChopWord(boxes, pr_it.block()->block,
                      pr_it.row()->row, word_res);
    pr_it.forward();
  }
  return page_res;
}
Пример #3
0
void LRURowStorage<ROW, V>::insert_inactive(int32_t row_id,
    const ROW<V>& new_row) {
  // Sanity check.
  int num_rows = inactive_list_.size() + active_list_.size();
  CHECK_LE(num_rows, this->capacity_)
    << "Number of rows exceed storage size.";

  // If necessary, make space...
  if (num_rows == this->capacity_) {
    // ...by purging the least-recently-used element (head of list).
    VLOG(1) << "Evicting row " << inactive_list_.right.begin()->second
      << " to insert row " << row_id;
    inactive_list_.right.erase(inactive_list_.right.begin());
  }

  // Create a new record from the key and the value
  // bimap's list_view defaults to inserting this at
  // the list tail (considered most-recently-used).
  int threshold_count = static_cast<int>(num_row_access_to_active_ *
    new_row.get_num_columns());  // Compute the threshold count.
  AccessStatsRow new_inactive_row = boost::tuples::make_tuple(new_row, 0,
      threshold_count);
  inactive_list_.insert(
    typename AccessCountMapList::value_type(row_id, new_inactive_row));
  VLOG(1) << "Inserted new row (row id = " << row_id << ") of size "
    << new_row.get_num_columns() << " with threshold count = "
    << threshold_count;
}
Пример #4
0
void PrintSegmentationStats(BLOCK_LIST* block_list) {
  int num_blocks = 0;
  int num_rows = 0;
  int num_words = 0;
  int num_blobs = 0;
  BLOCK_IT block_it(block_list);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    ++num_blocks;
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ++num_rows;
      ROW* row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD* werd = werd_it.data();
        ++num_words;
        num_blobs += werd->cblob_list()->length();
      }
    }
  }
  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
          num_blocks, num_rows, num_words, num_blobs);
}
Пример #5
0
void apply_box_training(BLOCK_LIST *block_list) {
  BLOCK_IT block_it(block_list);
  ROW_IT row_it;
  ROW *row;
  WERD_IT word_it;
  WERD *word;
  WERD *bln_word;
  WERD copy_outword;             // copy to denorm
  PBLOB_IT blob_it;
  DENORM denorm;
  INT16 count = 0;
  char ch[2];

  ch[1] = '\0';

  tprintf ("Generating training data\n");
  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      word_it.set_to_list (row->word_list ());
      for (word_it.mark_cycle_pt ();
      !word_it.cycled_list (); word_it.forward ()) {
        word = word_it.data ();
        if ((strlen (word->text ()) == 1) &&
        (word->gblob_list ()->length () == 1)) {
          /* Here is a word with a single char label and a single blob so train on it */
          bln_word =
            make_bln_copy (word, row, row->x_height (), &denorm);
          blob_it.set_to_list (bln_word->blob_list ());
          ch[0] = *word->text ();
          tess_training_tester (blob_it.data (),
                                 //single blob
            &denorm, TRUE,       //correct
            ch,                  //correct ASCII char
            1,                   //ASCII length
            NULL);
          copy_outword = *(bln_word);
          copy_outword.baseline_denormalise (&denorm);
          blob_it.set_to_list (copy_outword.blob_list ());
          ch[0] = *word->text ();
          delete bln_word;
          count++;
        }
      }
    }
  }
  tprintf ("Generated training data for %d blobs\n", count);
}
Пример #6
0
void TABLE::tCut()

  {
  if(TABLE_ATTRIBUTES::AutoFill())
    return;

  DestroyAllEditControls();
  Copy();

  RUNTIMEINFO*  rtime = (RUNTIMEINFO*)AObjGetRuntimeInfo(theObject);
  ROW*          row = rtime->GetRow();
  row->DeleteSelectedRows();
  PostObjectChanged(CHANGED_NUMROWS);
  }
Пример #7
0
void TABLE::DestroyEditControl()

  {
  if(hEdit)
    {
    // Get the rectangle of the cell for intelligent update
    RECT  cellRect;
    GetWindowRect(hEdit, &cellRect);
    ScreenToClient(hTable, (POINT*)&cellRect);
    ScreenToClient(hTable, (POINT*)&cellRect.right);
    cellRect.left -= COLUMN::CellCharWidth(editCol);
    cellRect.top = cellRect.bottom - TABLE_ATTRIBUTES::CellHeight();
    cellRect.right += COLUMN::CellCharWidth(editCol);

    // un-subclass the control in preparation for destroy
    RemoveProp(hEdit, "loPtr");
    RemoveProp(hEdit, "hiPtr");
    RemoveProp(hEdit, "number");
    SetWindowLong(hEdit, GWL_WNDPROC, (long)oldEditControlProc);

    // get the text and update the cell as necessary
    WORD  len = (WORD)SendMessage(hEdit, WM_GETTEXTLENGTH, 0, 0L)+1;
    char* buffer = new char[len];
    SendMessage(hEdit, WM_GETTEXT, (WPARAM)(len+1), (LPARAM)buffer);
    RUNTIMEINFO*  rtime = (RUNTIMEINFO*)AObjGetRuntimeInfo(theObject);
    ROW*          row = rtime->GetRow();
    if(hstrcmp(row->GetCell(editRow, editCol, FALSE, NULL), buffer))
      {
      row->SetCell(editRow, editCol, buffer);
      TABLE_ATTRIBUTES::SetEditRow(editRow);
      TABLE_ATTRIBUTES::SetEditColumn(editCol);
      PostObjectChanged(CHANGED_FULLREDRAW);
      AEvtPostSignalAtMark(theObject, SIGNAL_TABLEEDITED);
      }
    delete buffer;

    // destroy the window
    DestroyWindow(hEdit);
    hEdit = NULL;
    DeleteObject(hEditFont);

    AEvtPostSignalAtMark(theObject, SIGNAL_STOPEDITING);

    // make sure the proper region is redrawn
    InvalidateRect(hTable, &cellRect, FALSE);
    UpdateWindow(hTable);

    // wipPreventThieves(ParentID, FALSE); SKJ need replacement
    }
  }
Пример #8
0
void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) {
  BLOCK_IT block_it = blocks;    //iterator
  ROW_IT row_it;                 //row iterator

  int num_rows = 0;
  int num_rows_all = 0;
  int num_blocks = 0;
  int num_blocks_all = 0;
  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
       block_it.forward()) {
    BLOCK* block = block_it.data();
    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
      cleanup_nontext_block(block);
      continue;
    }
    num_rows = 0;
    num_rows_all = 0;
    if (clean_noise) {
      row_it.set_to_list(block->row_list());
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        ROW* row = row_it.data();
        ++num_rows_all;
        clean_small_noise_from_words(row);
        if ((textord_noise_rejrows && !row->word_list()->empty() &&
             clean_noise_from_row(row)) ||
            row->word_list()->empty()) {
          delete row_it.extract();  // lose empty row.
        } else {
          if (textord_noise_rejwords)
            clean_noise_from_words(row_it.data());
          if (textord_blshift_maxshift >= 0)
            tweak_row_baseline(row, textord_blshift_maxshift,
                               textord_blshift_xfraction);
          ++num_rows;
        }
      }
    }
    if (block->row_list()->empty()) {
      delete block_it.extract();  // Lose empty text blocks.
    } else {
      ++num_blocks;
    }
    ++num_blocks_all;
    if (textord_noise_debug)
      tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all);
  }
  if (textord_noise_debug)
    tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all);
}
Пример #9
0
void TABLE::tPaste()

  {
  if(TABLE_ATTRIBUTES::AutoFill())
    return;

  BOOL  available = IsClipboardFormatAvailable(CF_TEXT);
  if(!available)
    return;

  DestroyAllEditControls();

  OpenClipboard(hParentWnd);
  HGLOBAL hClipboard = GetClipboardData(CF_TEXT);
  if(!hClipboard)
    {
    CloseClipboard();
    return;
    }

  HGLOBAL hCopy = GlobalAlloc(GHND, GlobalSize(hClipboard));
  if(!hCopy)
    {
    CloseClipboard();
    return;
    }

  char* copy = (char*)GlobalLock(hCopy);
  char* clip = (char*)GlobalLock(hClipboard);
  hmemcpy(copy, clip, GlobalSize(hClipboard));
  GlobalUnlock(hClipboard);
  CloseClipboard();

  RUNTIMEINFO*  rtime = (RUNTIMEINFO*)AObjGetRuntimeInfo(theObject);
  ROW*          row = rtime->GetRow();
  long          first = row->FirstSelection();
  char*         token = (char*)myStrtok((char huge*)copy);
  while(token)
    {
    row->AddRow(first, token, TABLE_ATTRIBUTES::NumColumns(), FALSE);
    token = (char*)myStrtok(NULL);
    first++;
    }
  GlobalUnlock(hCopy);
  GlobalFree(hCopy);

  row->DeleteSelectedRows();
  PostObjectChanged(CHANGED_NUMROWS);
  }
Пример #10
0
void TABLE::tCopy()

  {
  RUNTIMEINFO*  rtime = (RUNTIMEINFO*)AObjGetRuntimeInfo(theObject);
  ROW*          row = rtime->GetRow();
  long          numRows = row->NumSelections();
  long          length = numRows;
  BOOL          dbAccessed = FALSE;

  if(numRows<1 || numRows>row->NumRows())
    return;

  for(long i=0; i<numRows; i++)
    length += row->RowLength(row->GetSelection(i), TRUE, &dbAccessed);

  HGLOBAL hClip = GlobalAlloc(GHND, length);
  if(!hClip)
    {
    if(dbAccessed)
      AEvtPostSignalAtMark(theObject, SIGNAL_TABLEAUTOFILLED);

    return;
    }

  char huge*  clip = (char huge*)GlobalLock(hClip);
  long        sel;
  long        offset=0;
  for(i=0; i<numRows; i++)
    {
    sel = row->GetSelection(i);
    length = row->RowLength(sel, TRUE, &dbAccessed);
    row->GetRow(sel, clip+offset, length, TRUE, TRUE, &dbAccessed);
    if(i!=(numRows-1))
      {
      // give the cr-lf combination
      *(clip+(offset + length - 1)) = '\r';
      *(clip+(offset + length++)) = '\n';
      }
    else
      *(clip+(offset + length - 1)) = 0;

    offset+=length;
    }
  GlobalUnlock(hClip);

  OpenClipboard(hParentWnd);
  EmptyClipboard();
  SetClipboardData(CF_TEXT, hClip);
  CloseClipboard();

  if(dbAccessed)
    AEvtPostSignalAtMark(theObject, SIGNAL_TABLEAUTOFILLED);
  }
Пример #11
0
//----------------------------------------------------------------------
MAT EEMD::eemdf90( ROW input, float noise_amplitude, int num_imfs, int num_ensembles ) {

    // Tanspose the Row vector into a Column vector
    VEC col_input = input.t();
    MAT col_imfs = eemdf90(col_input, noise_amplitude, num_imfs, num_ensembles);
    MAT row_imfs = col_imfs.t();
    
    return row_imfs;

}
Пример #12
0
void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list,
                                  C_BLOB_LIST* new_blobs,
                                  C_BLOB_LIST* not_found_blobs) {
  // Now iterate over all the blobs in the segmentation_block_list_, and just
  // replace the corresponding c-blobs inside the werds.
  BLOCK_IT block_it(block_list);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    if (block->poly_block() != NULL && !block->poly_block()->IsText())
      continue;  // Don't touch non-text blocks.
    // Iterate over all rows in the block.
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW* row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      WERD_LIST new_words;
      WERD_IT new_words_it(&new_words);
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD* werd = werd_it.extract();
        WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
                                                         not_found_blobs);
        if (new_werd) {
          // Insert this new werd into the actual row's werd-list. Remove the
          // existing one.
          new_words_it.add_after_then_move(new_werd);
          delete werd;
        } else {
          // Reinsert the older word back, for lack of better options.
          // This is critical since dropping the words messes up segmentation:
          // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
          new_words_it.add_after_then_move(werd);
        }
      }
      // Get rid of the old word list & replace it with the new one.
      row->word_list()->clear();
      werd_it.move_to_first();
      werd_it.add_list_after(&new_words);
    }
  }
}
Пример #13
0
/// Any row xheight that is significantly different from the median is set
/// to the median.
void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
  double median_xheight = MedianXHeight(block_list);
  double max_deviation = kMaxXHeightDeviationFraction * median_xheight;
  // Strip all fuzzy space markers to simplify the PAGE_RES.
  BLOCK_IT b_it(block_list);
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
    BLOCK* block = b_it.data();
    ROW_IT r_it(block->row_list());
    for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) {
      ROW* row = r_it.data();
      float diff = fabs(row->x_height() - median_xheight);
      if (diff > max_deviation) {
        if (applybox_debug) {
          tprintf("row xheight=%g, but median xheight = %g\n",
                  row->x_height(), median_xheight);
        }
        row->set_x_height(static_cast<float>(median_xheight));
      }
    }
  }
}
Пример #14
0
const FP_LIB_TABLE::ROW* FP_LIB_TABLE::FindRow( const wxString& aNickname )
    throw( IO_ERROR )
{
    ROW* row = findRow( aNickname );

    if( !row )
    {
        wxString msg = wxString::Format(
            _( "fp-lib-table files contain no lib with nickname '%s'" ),
            GetChars( aNickname ) );

        THROW_IO_ERROR( msg );
    }

    // We've been 'lazy' up until now, but it cannot be deferred any longer,
    // instantiate a PLUGIN of the proper kind if it is not already in this ROW.
    if( !row->plugin )
        row->setPlugin( IO_MGR::PluginFind( row->type ) );

    return row;
}
Пример #15
0
/**
 * Returns the baseline of the current object at the given level.
 * The baseline is the line that passes through (x1, y1) and (x2, y2).
 * WARNING: with vertical text, baselines may be vertical!
 */
bool PageIterator::Baseline(PageIteratorLevel level,
                            int* x1, int* y1, int* x2, int* y2) const {
  if (it_->word() == NULL) return false;  // Already at the end!
  ROW* row = it_->row()->row;
  WERD* word = it_->word()->word;
  TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
           ? word->bounding_box()
           : row->bounding_box();
  int left = box.left();
  ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
  int right = box.right();
  ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
  // Rotate to image coordinates and convert to global image coords.
  startpt.rotate(it_->block()->block->re_rotation());
  endpt.rotate(it_->block()->block->re_rotation());
  *x1 = startpt.x() / scale_ + rect_left_;
  *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
  *x2 = endpt.x() / scale_ + rect_left_;
  *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
  return true;
}
Пример #16
0
void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks,
                                  C_BLOB_LIST* output_blob_list) {
  C_BLOB_IT return_list_it(output_blob_list);
  BLOCK_IT block_it(blocks);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW* row = row_it.data();
      // Iterate over all werds in the row.
      WERD_IT werd_it(row->word_list());
      for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
        WERD* werd = werd_it.data();
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->cblob_list());
        return_list_it.move_to_last();
        return_list_it.add_list_after(werd->rej_cblob_list());
      }
    }
  }
}
Пример #17
0
// page_res is non-const because the iterator doesn't know if you are going
// to change the items it points to! Really a const here though.
void Tesseract::blob_feature_display(PAGE_RES* page_res,
                                     const TBOX& selection_box) {
  ROW* row;               // row of word
  BLOCK* block;           // block of word
  WERD* word = make_pseudo_word(page_res, selection_box, block, row);
  if (word != NULL) {
    WERD_RES word_res(word);
    word_res.x_height = row->x_height();
    word_res.SetupForRecognition(unicharset, this, BestPix(),
                                 tessedit_ocr_engine_mode, NULL,
                                 classify_bln_numeric_mode,
                                 textord_use_cjk_fp_model,
                                 poly_allow_detailed_fx,
                                 row, block);
    TWERD* bln_word = word_res.chopped_word;
    TBLOB* bln_blob = bln_word->blobs[0];
    INT_FX_RESULT_STRUCT fx_info;
    GenericVector<INT_FEATURE_STRUCT> bl_features;
    GenericVector<INT_FEATURE_STRUCT> cn_features;
    Classify::ExtractFeatures(*bln_blob, classify_nonlinear_norm, &bl_features,
                              &cn_features, &fx_info, NULL);
    // Display baseline features.
    ScrollView* bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0);
    ClearFeatureSpaceWindow(baseline, bl_win);
    for (int f = 0; f < bl_features.size(); ++f)
      RenderIntFeature(bl_win, &bl_features[f], ScrollView::GREEN);
    bl_win->Update();
    // Display cn features.
    ScrollView* cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0);
    ClearFeatureSpaceWindow(character, cn_win);
    for (int f = 0; f < cn_features.size(); ++f)
      RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN);
    cn_win->Update();

    delete word;
  }
}
Пример #18
0
// Fixes the block so it obeys all the rules:
// Must have at least one ROW.
// Must have at least one WERD.
// WERDs contain a fake blob.
void Textord::cleanup_nontext_block(BLOCK* block) {
  // Non-text blocks must contain at least one row.
  ROW_IT row_it(block->row_list());
  if (row_it.empty()) {
    const TBOX& box = block->pdblk.bounding_box();
    float height = box.height();
    int32_t xstarts[2] = {box.left(), box.right()};
    double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())};
    ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f,
                       height / 4.0f, 0, 1);
    row_it.add_after_then_move(row);
  }
  // Each row must contain at least one word.
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    ROW* row = row_it.data();
    WERD_IT w_it(row->word_list());
    if (w_it.empty()) {
      // Make a fake blob to put in the word.
      TBOX box = block->row_list()->singleton() ? block->pdblk.bounding_box()
                                                : row->bounding_box();
      C_BLOB* blob = C_BLOB::FakeBlob(box);
      C_BLOB_LIST blobs;
      C_BLOB_IT blob_it(&blobs);
      blob_it.add_after_then_move(blob);
      WERD* word = new WERD(&blobs, 0, nullptr);
      w_it.add_after_then_move(word);
    }
    // Each word must contain a fake blob.
    for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
      WERD* word = w_it.data();
      // Just assert that this is true, as it would be useful to find
      // out why it isn't.
      ASSERT_HOST(!word->cblob_list()->empty());
    }
    row->recalc_bounding_box();
  }
}
Пример #19
0
int LRURowStorage<ROW, V>::PutRow(int32_t row_id,
    const ROW<V>& new_row) {
  // Look for row_id in active_list_.
  const typename MapList::left_iterator ait = active_list_.left.find(row_id);
  if (ait != active_list_.left.end()) {
    // We found row_id in active_list_. Update it.
    ait->second = new_row;

    // Now move it to the end of the list (most recent)
    active_list_.right.relocate(
      active_list_.right.end(),
      active_list_.project_right(ait));
    return 0;   // replace existing row.
  }

  // Look for row_id in inactive_list_.
  const inactive_list_left_iter_t iit = inactive_list_.left.find(row_id);
  if (iit != inactive_list_.left.end()) {
    // We found row_id in inactive_list_.
    AccessStatsRow& row_tuple = iit->second;

    // Update it.
    row_tuple.template get<AccessStatsRowTuple::DATA>() = new_row;
    row_tuple.template get<AccessStatsRowTuple::CURR_COUNT>() +=
      new_row.get_num_columns();
    // Recompute the threshold count.
    row_tuple.template get<AccessStatsRowTuple::THRESHOLD>() =
      static_cast<int>(num_row_access_to_active_ * new_row.get_num_columns());

    PromoteInactiveRowOrRelocate(iit);
    return 0;   // replace existing row.
  }

  // Not found in either list, put it in inactive_list_.
  insert_inactive(row_id, new_row);
  return 1;   // inserting new row.
}
// This method resolves the cc bbox to a particular row and returns the row's
// xheight.
int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) {
  if (!segmentation_block_list_) {
    return global_xheight_;
  }
  // Compute the box coordinates in Tesseract's coordinate system.
  TBOX bbox(cc_bbox->x,
            pixGetHeight(orig_pix_) - cc_bbox->y - cc_bbox->h - 1,
            cc_bbox->x + cc_bbox->w,
            pixGetHeight(orig_pix_) - cc_bbox->y - 1);
  // Iterate over all blocks.
  BLOCK_IT block_it(segmentation_block_list_);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK* block = block_it.data();
    // Iterate over all rows in the block.
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW* row = row_it.data();
      if (!row->bounding_box().major_overlap(bbox)) {
        continue;
      }
      // Row could be skewed, warped, etc. Use the position of the box to
      // determine the baseline position of the row for that x-coordinate.
      // Create a square TBOX whose baseline's mid-point lies at this point
      // and side is row's xheight. Take the overlap of this box with the input
      // box and check if it is a 'major overlap'. If so, this box lies in this
      // row. In that case, return the xheight for this row.
      float box_middle = 0.5 * (bbox.left() + bbox.right());
      int baseline = static_cast<int>(row->base_line(box_middle) + 0.5);
      TBOX test_box(box_middle - row->x_height() / 2,
                    baseline,
                    box_middle + row->x_height() / 2,
                    static_cast<int>(baseline + row->x_height()));
      // Compute overlap. If it is is a major overlap, this is the right row.
      if (bbox.major_overlap(test_box)) {
        return row->x_height();
      }
    }
  }
  // No row found for this bbox.
  return kUnspecifiedXheight;
}
Пример #21
0
void TABLE::CreateEditControl()

  {
  if(hEdit || TABLE_ATTRIBUTES::AutoFill() || !TABLE_ATTRIBUTES::CanActivate())
    return;

  long  activeRow = TABLE_ATTRIBUTES::ActiveRow();
  if(activeRow<0)
    return;

  if((activeRow<firstRow) || (activeRow>(firstRow+numVisRows-1)))
    return;

  int   activeCol = TABLE_ATTRIBUTES::ActiveColumn();
  if(!COLUMN::CanEdit(activeCol))
    return;

  RUNTIMEINFO*  rtime = (RUNTIMEINFO*)AObjGetRuntimeInfo(theObject);
  ROW*          row = rtime->GetRow();
  if(!row->IsSelected(activeRow))
    return;

  int width = COLUMN::ColumnWidth(activeCol);
  if(width==0)
    return;

  int height = TABLE_ATTRIBUTES::CellHeight();
  if(TABLE_ATTRIBUTES::RowLines())
    height++;
  int cellY = tblRect.top + ((int)(activeRow - firstRow))*height;
  if(TABLE_ATTRIBUTES::ShowTitles())
    {
    cellY += (TABLE_ATTRIBUTES::TitleHeight()+1);
    if(TABLE_ATTRIBUTES::RowLines())
      cellY++;
    }

  if(TABLE_ATTRIBUTES::RowLines())
    height--;

  int cellX = tblRect.left;
  for(int i=0; i<activeCol; i++)
    {
    if((i>=TABLE_ATTRIBUTES::NumFixedColumns()) && (i<firstColumn))
      continue;

    if(COLUMN::ColumnWidth(i)==0)
      {
      if((i==(TABLE_ATTRIBUTES::NumFixedColumns()-1))
          &&
        (cellX!=tblRect.left))
        cellX++;
      continue;
      }

    cellX += COLUMN::ColumnWidth(i);
    if(TABLE_ATTRIBUTES::ColumnLines())
      cellX++;
    if(i==(TABLE_ATTRIBUTES::NumFixedColumns()-1))
      cellX++;
    }

  HDC       hDC = GetDC(hTable);
  HPEN      hPen = CreatePen(PS_SOLID, 1, GetSysColor(COLOR_WINDOW));
  LOGBRUSH  lb = {BS_SOLID, GetSysColor(COLOR_WINDOW), 0};
  HPEN      oldPen = (HPEN)SelectObject(hDC, hPen);
  HBRUSH    hBrush = CreateBrushIndirect(&lb);
  HBRUSH    oldBrush = (HBRUSH)SelectObject(hDC, hBrush);
  int       right = cellX+width;
  if(right>tblRect.right)
    right = tblRect.right;

  Rectangle(hDC, cellX, cellY, right, cellY+height);
  SelectObject(hDC, oldPen);
  DeleteObject(hPen);
  SelectObject(hDC, oldBrush);
  DeleteObject(hBrush);
  ReleaseDC(hParentWnd, hDC);

  cellX += COLUMN::CellCharWidth(activeCol);
  width -= 2*COLUMN::CellCharWidth(activeCol);
  if((cellX+width)>tblRect.right)
    width = tblRect.right - cellX;

  int offset = 2*TABLE_ATTRIBUTES::CellLeading()/3 + TABLE_ATTRIBUTES::CellAscent() - COLUMN::CellAscent(activeCol);
  cellY = cellY + offset;
  height = TABLE_ATTRIBUTES::CellHeight() - offset;

  editRow = activeRow;
  editCol = activeCol;

  char huge*  cellPtr = row->GetCell(activeRow, activeCol, FALSE, NULL);
  UINT  length = (UINT)hstrlen(cellPtr) + 1;
  char* cellStr = new char[length];
  hmemcpy(cellStr, cellPtr, length);
  hEdit = CreateWindow("EDIT", cellStr,
            WS_CHILD | ES_AUTOHSCROLL,
            cellX, cellY,
            width, height,
            hTable, (HMENU)3, G.hInstance, NULL);
  delete cellStr;

  // subclass the edit control
  SetProp(hEdit, "loPtr", (HANDLE)LOWORD((DWORD)this));
  SetProp(hEdit, "hiPtr", (HANDLE)HIWORD((DWORD)this));
  SetProp(hEdit, "number", (HANDLE)AObjCheckType(COLUMN::GetObjectID(activeCol), OTYPE_NUMBER));
  oldEditControlProc = (WNDPROC)GetWindowLong(hEdit, GWL_WNDPROC);
  SetWindowLong(hEdit, GWL_WNDPROC, (long)EditControlProc);
  ShowWindow(hEdit, SW_SHOW);

  hEditFont =CreateFontIndirect(COLUMN::CellLogFont(activeCol));
  SendMessage(hEdit, WM_SETFONT, (WPARAM)hEditFont, MAKELPARAM(TRUE, 0));
  SetFocus(hEdit);
  SendMessage(hEdit, EM_SETSEL, (WPARAM)FALSE, MAKELPARAM(0, -1));

  // wipPreventThieves(ParentID, TRUE); SKJ --> need replacement
  AEvtPostSignalAtMark(theObject, SIGNAL_STARTEDITING);
  }
Пример #22
0
void FP_LIB_TABLE::Parse( FP_LIB_TABLE_LEXER* in ) throw( IO_ERROR, PARSE_ERROR )
{
    /*
        (fp_lib_table
            (lib (name NICKNAME)(descr DESCRIPTION)(type TYPE)(full_uri FULL_URI)(options OPTIONS))
            :
        )

        Elements after (name) are order independent.
    */

    T       tok;

    // This table may be nested within a larger s-expression, or not.
    // Allow for parser of that optional containing s-epression to have looked ahead.
    if( in->CurTok() != T_fp_lib_table )
    {
        in->NeedLEFT();
        if( ( tok = in->NextTok() ) != T_fp_lib_table )
            in->Expecting( T_fp_lib_table );
    }

    while( ( tok = in->NextTok() ) != T_RIGHT )
    {
        ROW     row;        // reconstructed for each row in input stream.

        if( tok == T_EOF )
            in->Expecting( T_RIGHT );

        if( tok != T_LEFT )
            in->Expecting( T_LEFT );

        // in case there is a "row integrity" error, tell where later.
        int lineNum = in->CurLineNumber();
        int offset  = in->CurOffset();

        if( ( tok = in->NextTok() ) != T_lib )
            in->Expecting( T_lib );

        // (name NICKNAME)
        in->NeedLEFT();

        if( ( tok = in->NextTok() ) != T_name )
            in->Expecting( T_name );

        in->NeedSYMBOLorNUMBER();

        row.SetNickName( in->FromUTF8() );

        in->NeedRIGHT();

        // After (name), remaining (lib) elements are order independent, and in
        // some cases optional.
        bool    sawType = false;
        bool    sawOpts = false;
        bool    sawDesc = false;
        bool    sawUri  = false;

        while( ( tok = in->NextTok() ) != T_RIGHT )
        {
            if( tok == T_EOF )
                in->Unexpected( T_EOF );

            if( tok != T_LEFT )
                in->Expecting( T_LEFT );

            tok = in->NeedSYMBOLorNUMBER();

            switch( tok )
            {
            case T_uri:
                if( sawUri )
                    in->Duplicate( tok );
                sawUri = true;
                in->NeedSYMBOLorNUMBER();
                row.SetFullURI( in->FromUTF8() );
                break;

            case T_type:
                if( sawType )
                    in->Duplicate( tok );
                sawType = true;
                in->NeedSYMBOLorNUMBER();
                row.SetType( in->FromUTF8() );
                break;

            case T_options:
                if( sawOpts )
                    in->Duplicate( tok );
                sawOpts = true;
                in->NeedSYMBOLorNUMBER();
                row.SetOptions( in->FromUTF8() );
                break;

            case T_descr:
                if( sawDesc )
                    in->Duplicate( tok );
                sawDesc = true;
                in->NeedSYMBOLorNUMBER();
                row.SetDescr( in->FromUTF8() );
                break;

            default:
                in->Unexpected( tok );
            }

            in->NeedRIGHT();
        }

        if( !sawType )
            in->Expecting( T_type );

        if( !sawUri )
            in->Expecting( T_uri );

        // all nickNames within this table fragment must be unique, so we do not
        // use doReplace in InsertRow().  (However a fallBack table can have a
        // conflicting nickName and ours will supercede that one since in
        // FindLib() we search this table before any fall back.)
        if( !InsertRow( row ) )
        {
            wxString msg = wxString::Format(
                                _( "'%s' is a duplicate footprint library nickName" ),
                                GetChars( row.nickName ) );
            THROW_PARSE_ERROR( msg, in->CurSource(), in->CurLine(), lineNum, offset );
        }
    }
}
Пример #23
0
void apply_box_testing(BLOCK_LIST *block_list) {
  BLOCK_IT block_it(block_list);
  ROW_IT row_it;
  ROW *row;
  INT16 row_count = 0;
  WERD_IT word_it;
  WERD *word;
  WERD *bln_word;
  INT16 word_count = 0;
  PBLOB_IT blob_it;
  DENORM denorm;
  INT16 count = 0;
  char ch[2];
  WERD *outword;                 //bln best choice
  //segmentation
  WERD_CHOICE *best_choice;      //tess output
  WERD_CHOICE *raw_choice;       //top choice permuter
                                 //detailed results
  BLOB_CHOICE_LIST_CLIST blob_choices;
  INT16 char_count = 0;
  INT16 correct_count = 0;
  INT16 err_count = 0;
  INT16 rej_count = 0;
  #ifndef SECURE_NAMES
  WERDSTATS wordstats;           //As from newdiff
  #endif
  char tess_rej_str[3];
  char tess_long_str[3];

  ch[1] = '\0';
  strcpy (tess_rej_str, "|A");
  strcpy (tess_long_str, "|B");

  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      row_count++;
      word_count = 0;
      word_it.set_to_list (row->word_list ());
      for (word_it.mark_cycle_pt ();
      !word_it.cycled_list (); word_it.forward ()) {
        word = word_it.data ();
        word_count++;
        if ((strlen (word->text ()) == 1) &&
          !STRING (applybox_test_exclusions).contains (*word->text ())
        && (word->gblob_list ()->length () == 1)) {
          /* Here is a word with a single char label and a single blob so test it */
          bln_word =
            make_bln_copy (word, row, row->x_height (), &denorm);
          blob_it.set_to_list (bln_word->blob_list ());
          ch[0] = *word->text ();
          char_count++;
          best_choice = tess_segment_pass1 (bln_word,
            &denorm,
            tess_default_matcher,
            raw_choice,
            &blob_choices, outword);

          /*
            Test for TESS screw up on word. Recog_word has already ensured that the
            choice list, outword blob lists and best_choice string are the same
            length. A TESS screw up is indicated by a blank filled or 0 length string.
          */
          if ((best_choice->string ().length () == 0) ||
            (strspn (best_choice->string ().string (), " ") ==
          best_choice->string ().length ())) {
            rej_count++;
            tprintf ("%d:%d: \"%s\" -> TESS FAILED\n",
              row_count, word_count, ch);
            #ifndef SECURE_NAMES
            wordstats.word (tess_rej_str, 2, ch, 1);
            #endif
          }
          else {
            if ((best_choice->string ().length () !=
              outword->blob_list ()->length ()) ||
              (best_choice->string ().length () !=
            blob_choices.length ())) {
              tprintf
                ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
                best_choice->string ().string (),
                best_choice->string ().length (),
                outword->blob_list ()->length (),
                blob_choices.length ());
            }
            ASSERT_HOST (best_choice->string ().length () ==
              outword->blob_list ()->length ());
            ASSERT_HOST (best_choice->string ().length () ==
              blob_choices.length ());
            fix_quotes ((char *) best_choice->string ().string (),
                                 //turn to double
              outword, &blob_choices);
            if (strcmp (best_choice->string ().string (), ch) != 0) {
              err_count++;
              tprintf ("%d:%d: \"%s\" -> \"%s\"\n",
                row_count, word_count, ch,
                best_choice->string ().string ());
            }
            else
              correct_count++;
            #ifndef SECURE_NAMES
            if (best_choice->string ().length () > 2)
              wordstats.word (tess_long_str, 2, ch, 1);
            else
              wordstats.word ((char *) best_choice->string ().
                string (),
                best_choice->string ().length (), ch,
                1);
            #endif
          }
          delete bln_word;
          delete outword;
          delete best_choice;
          delete raw_choice;
          blob_choices.deep_clear ();
          count++;
        }
      }
    }
  }
  #ifndef SECURE_NAMES
  wordstats.print (1, 100.0);
  wordstats.conf_matrix ();
  tprintf ("Tested %d chars: %d correct; %d rejected by tess; %d errs\n",
    char_count, correct_count, rej_count, err_count);
  #endif
}
Пример #24
0
/*************************************************************************
 * tidy_up()
 *   - report >1 block
 *   - sort the words in each row.
 *   - report any rows with no labelled words.
 *   - report any remaining unlabelled words
 *		- report total labelled words
 *
 *************************************************************************/
void tidy_up(                         //
             BLOCK_LIST *block_list,  //real blocks
             INT16 &ok_char_count,
             INT16 &ok_row_count,
             INT16 &unlabelled_words,
             INT16 *tgt_char_counts,
             INT16 &rebalance_count,
             char &min_char,
             INT16 &min_samples,
             INT16 &final_labelled_blob_count) {
  BLOCK_IT block_it(block_list);
  ROW_IT row_it;
  ROW *row;
  WERD_IT word_it;
  WERD *word;
  WERD *duplicate_word;
  INT16 block_idx = 0;
  INT16 row_idx;
  INT16 all_row_idx = 0;
  BOOL8 row_ok;
  BOOL8 rebalance_needed = FALSE;
                                 //No. of unique labelled samples
  INT16 labelled_char_counts[128];
  INT16 i;
  char ch;
  char prev_ch = '\0';
  BOOL8 at_dupe_of_prev_word;
  ROW *prev_row = NULL;
  INT16 left;
  INT16 prev_left = -1;

  for (i = 0; i < 128; i++)
    labelled_char_counts[i] = 0;

  ok_char_count = 0;
  ok_row_count = 0;
  unlabelled_words = 0;
  if ((applybox_debug > 4) && (block_it.length () != 1))

    tprintf ("APPLY_BOXES: More than one block??\n");

  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    block_idx++;
    row_idx = 0;
    row_ok = FALSE;
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row_idx++;
      all_row_idx++;
      row = row_it.data ();
      word_it.set_to_list (row->word_list ());
      word_it.sort (word_comparator);
      for (word_it.mark_cycle_pt ();
      !word_it.cycled_list (); word_it.forward ()) {
        word = word_it.data ();
        if (strlen (word->text ()) == 0) {
          unlabelled_words++;
          if (applybox_debug > 4) {
            tprintf
              ("APPLY_BOXES: Unlabelled word blk:%d row:%d allrows:%d\n",
              block_idx, row_idx, all_row_idx);
          }
        }
        else {
          if (word->gblob_list ()->length () != 1)
            tprintf
              ("APPLY_BOXES: FATALITY - MULTIBLOB Labelled word blk:%d row:%d allrows:%d\n",
              block_idx, row_idx, all_row_idx);

          ok_char_count++;
          labelled_char_counts[*word->text ()]++;
          row_ok = TRUE;
        }
      }
      if ((applybox_debug > 4) && (!row_ok)) {
        tprintf
          ("APPLY_BOXES: Row with no labelled words blk:%d row:%d allrows:%d\n",
          block_idx, row_idx, all_row_idx);
      }
      else
        ok_row_count++;
    }
  }

  min_samples = 9999;
  for (i = 0; i < 128; i++) {
    if (tgt_char_counts[i] > labelled_char_counts[i]) {
      if (labelled_char_counts[i] <= 1) {
        tprintf
          ("APPLY_BOXES: FATALITY - %d labelled samples of \"%c\" - target is %d\n",
          labelled_char_counts[i], (char) i, tgt_char_counts[i]);
      }
      else {
        rebalance_needed = TRUE;
        if (applybox_debug > 0)
          tprintf
            ("APPLY_BOXES: REBALANCE REQD \"%c\" - target of %d from %d labelled samples\n",
            (char) i, tgt_char_counts[i], labelled_char_counts[i]);
      }
    }
    if ((min_samples > labelled_char_counts[i]) && (tgt_char_counts[i] > 0)) {
      min_samples = labelled_char_counts[i];
      min_char = (char) i;
    }
  }

  while (applybox_rebalance && rebalance_needed) {
    block_it.set_to_list (block_list);
    for (block_it.mark_cycle_pt ();
    !block_it.cycled_list (); block_it.forward ()) {
      row_it.set_to_list (block_it.data ()->row_list ());
      for (row_it.mark_cycle_pt ();
      !row_it.cycled_list (); row_it.forward ()) {
        row = row_it.data ();
        word_it.set_to_list (row->word_list ());
        for (word_it.mark_cycle_pt ();
        !word_it.cycled_list (); word_it.forward ()) {
          word = word_it.data ();
          left = word->bounding_box ().left ();
          ch = *word->text ();
          at_dupe_of_prev_word = ((row == prev_row) &&
            (left = prev_left) &&
            (ch == prev_ch));
          if ((ch != '\0') &&
            (labelled_char_counts[ch] > 1) &&
            (tgt_char_counts[ch] > labelled_char_counts[ch]) &&
          (!at_dupe_of_prev_word)) {
            /* Duplicate the word to rebalance the labelled samples */
            if (applybox_debug > 9) {
              tprintf ("Duping \"%c\" from ", ch);
              word->bounding_box ().print ();
            }
            duplicate_word = new WERD;
            *duplicate_word = *word;
            word_it.add_after_then_move (duplicate_word);
            rebalance_count++;
            labelled_char_counts[ch]++;
          }
          prev_row = row;
          prev_left = left;
          prev_ch = ch;
        }
      }
    }
    rebalance_needed = FALSE;
    for (i = 0; i < 128; i++) {
      if ((tgt_char_counts[i] > labelled_char_counts[i]) &&
      (labelled_char_counts[i] > 1)) {
        rebalance_needed = TRUE;
        break;
      }
    }
  }

  /* Now final check - count labelled blobs */
  final_labelled_blob_count = 0;
  block_it.set_to_list (block_list);
  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    row_it.set_to_list (block_it.data ()->row_list ());
    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
      row = row_it.data ();
      word_it.set_to_list (row->word_list ());
      word_it.sort (word_comparator);
      for (word_it.mark_cycle_pt ();
      !word_it.cycled_list (); word_it.forward ()) {
        word = word_it.data ();
        if ((strlen (word->text ()) == 1) &&
          (word->gblob_list ()->length () == 1))
          final_labelled_blob_count++;
      }
    }
  }
}
Пример #25
0
ROW *find_row_of_box(                         //
                     BLOCK_LIST *block_list,  //real blocks
                     BOX box,                 //from boxfile
                     INT16 &block_id,
                     INT16 &row_id_to_process) {
  BLOCK_IT block_it(block_list);
  BLOCK *block;
  ROW_IT row_it;
  ROW *row;
  ROW *row_to_process = NULL;
  INT16 row_id;
  WERD_IT word_it;
  WERD *word;
  BOOL8 polyg;
  PBLOB_IT blob_it;
  PBLOB *blob;
  OUTLINE_IT outline_it;
  OUTLINE *outline;

  /*
    Find row to process - error if box REALLY overlaps more than one row. (I.e
    it overlaps blobs in the row - not just overlaps the bounding box of the
    whole row.)
  */

  block_id = 0;
  for (block_it.mark_cycle_pt ();
  !block_it.cycled_list (); block_it.forward ()) {
    block_id++;
    row_id = 0;
    block = block_it.data ();
    if (block->bounding_box ().overlap (box)) {
      row_it.set_to_list (block->row_list ());
      for (row_it.mark_cycle_pt ();
      !row_it.cycled_list (); row_it.forward ()) {
        row_id++;
        row = row_it.data ();
        if (row->bounding_box ().overlap (box)) {
          word_it.set_to_list (row->word_list ());
          for (word_it.mark_cycle_pt ();
          !word_it.cycled_list (); word_it.forward ()) {
            word = word_it.data ();
            polyg = word->flag (W_POLYGON);
            if (word->bounding_box ().overlap (box)) {
              blob_it.set_to_list (word->gblob_list ());
              for (blob_it.mark_cycle_pt ();
              !blob_it.cycled_list (); blob_it.forward ()) {
                blob = blob_it.data ();
                if (gblob_bounding_box (blob, polyg).
                overlap (box)) {
                  outline_it.
                    set_to_list (gblob_out_list
                    (blob, polyg));
                  for (outline_it.mark_cycle_pt ();
                    !outline_it.cycled_list ();
                  outline_it.forward ()) {
                    outline = outline_it.data ();
                    if (goutline_bounding_box
                    (outline, polyg).major_overlap (box)) {
                      if ((row_to_process == NULL) ||
                      (row_to_process == row)) {
                        row_to_process = row;
                        row_id_to_process = row_id;
                      }
                      else
                        /* RETURN ERROR Box overlaps blobs in more than one row  */
                        return NULL;
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  return row_to_process;
}
Пример #26
0
// Groups blocks by rotation, then, for each group, makes a WordGrid and calls
// TransferDiacriticsToWords to copy the diacritic blobs to the most
// appropriate words in the group of blocks. Source blobs are not touched.
void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
                                              BLOCK_LIST* blocks) {
  // Angle difference larger than this is too much to consider equal.
  // They should only be in multiples of M_PI/2 anyway.
  const double kMaxAngleDiff = 0.01;  // About 0.6 degrees.
  PointerVector<BlockGroup> groups;
  BLOCK_IT bk_it(blocks);
  for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) {
    BLOCK* block = bk_it.data();
    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
      continue;
    }
    // Linear search of the groups to find a matching rotation.
    float block_angle = block->re_rotation().angle();
    int best_g = 0;
    float best_angle_diff = MAX_FLOAT32;
    for (int g = 0; g < groups.size(); ++g) {
      double angle_diff = fabs(block_angle - groups[g]->angle);
      if (angle_diff > M_PI) angle_diff = fabs(angle_diff - 2.0 * M_PI);
      if (angle_diff < best_angle_diff) {
        best_angle_diff = angle_diff;
        best_g = g;
      }
    }
    if (best_angle_diff > kMaxAngleDiff) {
      groups.push_back(new BlockGroup(block));
    } else {
      groups[best_g]->blocks.push_back(block);
      groups[best_g]->bounding_box += block->pdblk.bounding_box();
      float x_height = block->x_height();
      if (x_height < groups[best_g]->min_xheight)
        groups[best_g]->min_xheight = x_height;
    }
  }
  // Now process each group of blocks.
  PointerVector<WordWithBox> word_ptrs;
  for (int g = 0; g < groups.size(); ++g) {
    const BlockGroup* group = groups[g];
    if (group->bounding_box.null_box()) continue;
    WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(),
                       group->bounding_box.topright());
    for (int b = 0; b < group->blocks.size(); ++b) {
      ROW_IT row_it(group->blocks[b]->row_list());
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
        ROW* row = row_it.data();
        // Put the words of the row into the grid.
        WERD_IT w_it(row->word_list());
        for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
          WERD* word = w_it.data();
          WordWithBox* box_word = new WordWithBox(word);
          word_grid.InsertBBox(true, true, box_word);
          // Save the pointer where it will be auto-deleted.
          word_ptrs.push_back(box_word);
        }
      }
    }
    FCOORD rotation = group->rotation;
    // Make it a forward rotation that will transform blob coords to block.
    rotation.set_y(-rotation.y());
    TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid);
  }
}
Пример #27
0
/// Consume all source blobs that strongly overlap the given box,
/// putting them into a new word, with the correct_text label.
/// Fights over which box owns which blobs are settled by
/// applying the blobs to box or next_box with the least non-overlap.
/// @return false if the box was in error, which can only be caused by
/// failing to find an overlapping blob for a box.
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
                                 const TBOX& box, const TBOX& next_box,
                                 const char* correct_text) {
  if (applybox_debug > 1) {
    tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text);
  }
  WERD* new_word = NULL;
  BLOCK_IT b_it(block_list);
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
    BLOCK* block = b_it.data();
    if (!box.major_overlap(block->bounding_box()))
      continue;
    ROW_IT r_it(block->row_list());
    for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
      ROW* row = r_it.data();
      if (!box.major_overlap(row->bounding_box()))
        continue;
      WERD_IT w_it(row->word_list());
      for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
        WERD* word = w_it.data();
        if (applybox_debug > 2) {
          tprintf("Checking word:");
          word->bounding_box().print();
        }
        if (word->text() != NULL && word->text()[0] != '\0')
          continue;  // Ignore words that are already done.
        if (!box.major_overlap(word->bounding_box()))
          continue;
        C_BLOB_IT blob_it(word->cblob_list());
        for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
             blob_it.forward()) {
          C_BLOB* blob = blob_it.data();
          TBOX blob_box = blob->bounding_box();
          if (!blob_box.major_overlap(box))
            continue;
          double current_box_miss_metric = BoxMissMetric(blob_box, box);
          double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
          if (applybox_debug > 2) {
            tprintf("Checking blob:");
            blob_box.print();
            tprintf("Current miss metric = %g, next = %g\n",
                    current_box_miss_metric, next_box_miss_metric);
          }
          if (current_box_miss_metric > next_box_miss_metric)
            continue;  // Blob is a better match for next box.
          if (applybox_debug > 2) {
            tprintf("Blob match: blob:");
            blob_box.print();
            tprintf("Matches box:");
            box.print();
            tprintf("With next box:");
            next_box.print();
          }
          if (new_word == NULL) {
            // Make a new word with a single blob.
            new_word = word->shallow_copy();
            new_word->set_text(correct_text);
            w_it.add_to_end(new_word);
          }
          C_BLOB_IT new_blob_it(new_word->cblob_list());
          new_blob_it.add_to_end(blob_it.extract());
        }
      }
    }
  }
  if (new_word == NULL && applybox_debug > 0) tprintf("FAIL!\n");
  return new_word != NULL;
}
Пример #28
0
// Compute the distance from the left and right ends of each row to the
// left and right edges of the block's polyblock.  Illustration:
//  ____________________________   _______________________
//  |  Howdy neighbor!         |  |rectangular blocks look|
//  |  This text is  written to|  |more like stacked pizza|
//  |illustrate how useful poly-  |boxes.                 |
//  |blobs  are   in -----------  ------   The    polyblob|
//  |dealing    with|     _________     |for a BLOCK  rec-|
//  |harder   layout|   /===========\   |ords the possibly|
//  |issues.        |    |  _    _  |   |skewed    pseudo-|
//  |  You  see this|    | |_| \|_| |   |rectangular      |
//  |text is  flowed|    |      }   |   |boundary     that|
//  |around  a  mid-|     \   ____  |   |forms the  ideal-|
//  |cloumn portrait._____ \       /  __|ized  text margin|
//  |  Polyblobs     exist| \    /   |from which we should|
//  |to account for insets|  |   |   |measure    paragraph|
//  |which make  otherwise|  -----   |indentation.        |
//  -----------------------          ----------------------
//
// If we identify a drop-cap, we measure the left margin for the lines
// below the first line relative to one space past the drop cap.  The
// first line's margin and those past the drop cap area are measured
// relative to the enclosing polyblock.
//
// TODO(rays): Before this will work well, we'll need to adjust the
//             polyblob tighter around the text near images, as in:
//             UNLV_AUTO:mag.3G0  page 2
//             UNLV_AUTO:mag.3G4  page 16
void BLOCK::compute_row_margins() {
  if (row_list()->empty() || row_list()->singleton()) {
    return;
  }

  // If Layout analysis was not called, default to this.
  POLY_BLOCK rect_block(bounding_box(), PT_FLOWING_TEXT);
  POLY_BLOCK *pblock = &rect_block;
  if (poly_block() != NULL) {
    pblock = poly_block();
  }

  // Step One: Determine if there is a drop-cap.
  //           TODO(eger): Fix up drop cap code for RTL languages.
  ROW_IT r_it(row_list());
  ROW *first_row = r_it.data();
  ROW *second_row = r_it.data_relative(1);

  // initialize the bottom of a fictitious drop cap far above the first line.
  int drop_cap_bottom = first_row->bounding_box().top() +
                        first_row->bounding_box().height();
  int drop_cap_right = first_row->bounding_box().left();
  int mid_second_line = second_row->bounding_box().top() -
                        second_row->bounding_box().height() / 2;
  WERD_IT werd_it(r_it.data()->word_list());  // words of line one
  if (!werd_it.empty()) {
    C_BLOB_IT cblob_it(werd_it.data()->cblob_list());
    for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list();
         cblob_it.forward()) {
      TBOX bbox = cblob_it.data()->bounding_box();
      if (bbox.bottom() <= mid_second_line) {
        // we found a real drop cap
        first_row->set_has_drop_cap(true);
        if (drop_cap_bottom >  bbox.bottom())
          drop_cap_bottom = bbox.bottom();
        if (drop_cap_right < bbox.right())
          drop_cap_right = bbox.right();
      }
    }
  }

  // Step Two: Calculate the margin from the text of each row to the block
  //           (or drop-cap) boundaries.
  PB_LINE_IT lines(pblock);
  r_it.set_to_list(row_list());
  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
    ROW *row = r_it.data();
    TBOX row_box = row->bounding_box();
    int left_y = row->base_line(row_box.left()) + row->x_height();
    int left_margin;
    ICOORDELT_LIST *segments = lines.get_line(left_y);
    LeftMargin(segments, row_box.left(), &left_margin);
    delete segments;

    if (row_box.top() >= drop_cap_bottom) {
      int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
      if (drop_cap_distance < 0)
        drop_cap_distance = 0;
      if (drop_cap_distance < left_margin)
        left_margin = drop_cap_distance;
    }

    int right_y = row->base_line(row_box.right()) + row->x_height();
    int right_margin;
    segments = lines.get_line(right_y);
    RightMargin(segments, row_box.right(), &right_margin);
    delete segments;
    row->set_lmargin(left_margin);
    row->set_rmargin(right_margin);
  }
}
Пример #29
0
/**
    \brief Nahrazuje ldu += u*(-l*d); tak aby nedochazelo k nacitani u do noveho radku => Chaby pokus o zrychleni
    \param[in] ac - actual column
    \param[in] jj - Na kolikate pozici v radku jsem
    \param[in,out] ldu - Radek rozkladane matice, ktery se updatuje
    \warning - Zastarale - ROW nahrazeno jinou tridou
*/
void SPARSE::update_ldu_row(long ac, long jj, ROW& ldu)
{

    double d = *diagonal[ac];
    double l = ldu.values[jj] /= d;

    //PRIPRAV HONDOTY PRO PREDELANE MERGE FIELDS
    //IN_1 - PUVODNI HODNOTA RADKU ldu
    const double* v1  = ldu.values;
    const long*   i1  = ldu.index;
    const long i1_num = ldu.value_number;

    //IN_2 - HODNOTA U Z LDU MATICE
    const double*   v2      = (diagonal[ac]+1);
    long            s       = v2 - value_start[ac];//Vzdalenost zacatku v_2 od prvniho prvku v radku
    const long*     i2      = &column_start[ac][s];
    const long      i2_num  = (&value_start[ac][value_length[ac]]-v2);

    ldu.prepare_field(ldu.full_length);//orig pole bude volne k zapisu

    //OUT - VYSLEDNY RADEK ldu
    double* v_out = ldu.values;
    long*   i_out = ldu.index;

    //=========================
    //PREDELANE MERGE_FIELDS
    //=========================
    long i = 0; /// \param i - akt umisteni indexu [0]
    long j = 0; /// \param j - akt umisteni indexu [1]
    long k = 0; /// \param k - akt pocet vystupu

    // Dokud nedojdes na konec obou poli indexu...
    while ((i<i1_num && j<i2_num)){
        //======================================================
        //???Ktera hodnota z indexu na ktere se divam je vetsi???
        if(i1[i] < i2[j]){// Prvni index bude zapsan
            i_out[k] = i1[i];
            v_out[k] = v1[i];
            i++;
        }else{
            if(i2[j] < i1[i]){// Druhy index bude zapsan
                i_out[k] = i2[j];
                v_out[k] = v2[j]*(-l*d);
                j++;
            }else{// Oba indexy jsou stejne -> secist values
                i_out[k] = i1[i];
                v_out[k] = v1[i]+v2[j]*(-l*d);
                i++; j++;
            }
        }
        k++;
    }

    while (i<i1_num){
        i_out[k] = i1[i];
        v_out[k] = v1[i];
        i++; k++;
    }
    while (j<i2_num){
        i_out[k] = i2[j];
        v_out[k] = v2[j]*(-l*d);
        j++; k++;
    }
    ldu.value_number = k;
    //*************************
    //PREDELANE MERGE_FIELDS
    //*************************
}