// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, // but uses the median top/bottom for horizontal and median left/right for // vertical instead of the bounding box edges. // Evaluates for both horizontal and vertical and returns the best result, // with a positive value for horizontal and a negative value for vertical. int TextlineProjection::EvaluateColPartition(const ColPartition& part, const DENORM* denorm, bool debug) const { if (part.IsSingleton()) return EvaluateBox(part.bounding_box(), denorm, debug); // Test vertical orientation. TBOX box = part.bounding_box(); // Use the partition median for left/right. box.set_left(part.median_left()); box.set_right(part.median_right()); int vresult = EvaluateBox(box, denorm, debug); // Test horizontal orientation. box = part.bounding_box(); // Use the partition median for top/bottom. box.set_top(part.median_top()); box.set_bottom(part.median_bottom()); int hresult = EvaluateBox(box, denorm, debug); if (debug) { tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult); part.bounding_box().print(); part.Print(); } return hresult >= -vresult ? hresult : vresult; }
// Make a block using lines parallel to the given vector that fit between // the min and max coordinates specified by the ColPartitions. // Construct a block from the given list of partitions. void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution, ColPartition_LIST* used_parts) { part_it_.move_to_first(); while (!part_it_.empty()) { // Gather a list of ColPartitions in block_parts that will be split // by linespacing into smaller blocks. ColPartition_LIST block_parts; ColPartition_IT block_it(&block_parts); ColPartition* next_part = NULL; bool text_block = false; do { ColPartition* part = part_it_.extract(); if (part->blob_type() == BRT_UNKNOWN || part->blob_type() == BRT_TEXT) text_block = true; part->set_working_set(NULL); part_it_.forward(); block_it.add_after_then_move(part); next_part = part->SingletonPartner(false); if (part_it_.empty() || next_part != part_it_.data()) { // Sequences of partitions can get split by titles. next_part = NULL; } // Merge adjacent blocks that are of the same type and let the // linespacing determine the real boundaries. if (next_part == NULL && !part_it_.empty()) { ColPartition* next_block_part = part_it_.data(); const TBOX& part_box = part->bounding_box(); const TBOX& next_box = next_block_part->bounding_box(); // In addition to the same type, the next box must not be above the // current box, nor (if image) too far below. PolyBlockType type = part->type(), next_type = next_block_part->type(); if (ColPartition::TypesSimilar(type, next_type) && next_box.bottom() <= part_box.top() && (text_block || part_box.bottom() - next_box.top() < part_box.height())) next_part = next_block_part; } } while (!part_it_.empty() && next_part != NULL); if (!text_block) { TO_BLOCK* to_block = ColPartition::MakeBlock(bleft, tright, &block_parts, used_parts); if (to_block != NULL) { TO_BLOCK_IT to_block_it(&to_blocks_); to_block_it.add_to_end(to_block); BLOCK_IT block_it(&completed_blocks_); block_it.add_to_end(to_block->block); } } else { // Further sub-divide text blocks where linespacing changes. ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts, used_parts, &completed_blocks_, &to_blocks_); } } part_it_.set_to_list(&part_set_); latest_part_ = NULL; ASSERT_HOST(completed_blocks_.length() == to_blocks_.length()); }
// Adds the coverage, column count and box for a single partition, // without adding it to the list. (Helper factored from ComputeCoverage.) void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) { bounding_box_ += part.bounding_box(); int coverage = part.ColumnWidth(); if (part.good_width()) { good_coverage_ += coverage; good_column_count_ += 2; } else { if (part.blob_type() < BRT_UNKNOWN) coverage /= 2; if (part.good_column()) ++good_column_count_; bad_coverage_ += coverage; } }
// Compute the distance of the box from the partition using curved projection // space. As DistanceOfBoxFromBox, except that the direction is taken from // the ColPartition and the median bounds of the ColPartition are used as // the to_box. int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part, const DENORM* denorm, bool debug) const { // Compute a partition box that uses the median top/bottom of the blobs // within and median left/right for vertical. TBOX part_box = part.bounding_box(); if (part.IsHorizontalType()) { part_box.set_top(part.median_top()); part_box.set_bottom(part.median_bottom()); } else { part_box.set_left(part.median_left()); part_box.set_right(part.median_right()); } // Now use DistanceOfBoxFromBox to make the actual calculation. return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(), denorm, debug); }
ColPartition* M_Utils::getTBoxColPart(ColPartitionGrid* cpgrid, TBOX t, PIX* img) { ColPartitionGridSearch colsearch(cpgrid); colsearch.StartFullSearch(); ColPartition* curpart = NULL; while ((curpart = colsearch.NextFullSearch()) != NULL) { BOX* partbox; if(curpart->boxes_count() > 0) partbox = getColPartImCoords(curpart, img); else { TBOX b = curpart->bounding_box(); partbox = tessTBoxToImBox(&b, img); } TBOX rtbox = t; BOX* box = tessTBoxToImBox(&rtbox, img); int intersects; boxIntersects(partbox, box, &intersects); if(intersects) return curpart; boxDestroy(&partbox); boxDestroy(&box); } return NULL; }
// Return true if the partitions in other are all compatible with the columns // in this. bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback* cb) { if (debug) { tprintf("CompatibleColumns testing compatibility\n"); Print(); other->Print(); } if (other->parts_.empty()) { if (debug) tprintf("CompatibleColumns true due to empty other\n"); return true; } ColPartition_IT it(&other->parts_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* part = it.data(); if (part->blob_type() < BRT_UNKNOWN) { if (debug) { tprintf("CompatibleColumns ignoring image partition\n"); part->Print(); } continue; // Image partitions are irrelevant to column compatibility. } int y = part->MidY(); int left = part->bounding_box().left(); int right = part->bounding_box().right(); ColPartition* left_col = ColumnContaining(left, y); ColPartition* right_col = ColumnContaining(right, y); if (right_col == NULL || left_col == NULL) { if (debug) { tprintf("CompatibleColumns false due to partition edge outside\n"); part->Print(); } return false; // A partition edge lies outside of all columns } if (right_col != left_col && cb->Run(right - left)) { if (debug) { tprintf("CompatibleColumns false due to good width in multiple cols\n"); part->Print(); } return false; // Partition with a good width must be in a single column. } ColPartition_IT it2= it; while (!it2.at_last()) { it2.forward(); ColPartition* next_part = it2.data(); if (!BLOBNBOX::IsTextType(next_part->blob_type())) continue; // Non-text partitions are irrelevant. int next_left = next_part->bounding_box().left(); if (next_left == right) { break; // They share the same edge, so one must be a pull-out. } // Search to see if right and next_left fall within a single column. ColPartition* next_left_col = ColumnContaining(next_left, y); if (right_col == next_left_col) { // There is a column break in this column. // This can be due to a figure caption within a column, a pull-out // block, or a simple broken textline that remains to be merged: // all allowed, or a change in column layout: not allowed. // If both partitions are of good width, then it is likely // a change in column layout, otherwise probably an allowed situation. if (part->good_width() && next_part->good_width()) { if (debug) { int next_right = next_part->bounding_box().right(); tprintf("CompatibleColumns false due to 2 parts of good width\n"); tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left, next_right); right_col->Print(); } return false; } } break; } } if (debug) tprintf("CompatibleColumns true!\n"); return true; }