Esempio n. 1
0
// Make a block using lines parallel to the given vector that fit between
// the min and max coordinates specified by the ColPartitions.
// Construct a block from the given list of partitions.
void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright,
                                int resolution, ColPartition_LIST* used_parts) {
  part_it_.move_to_first();
  while (!part_it_.empty()) {
    // Gather a list of ColPartitions in block_parts that will be split
    // by linespacing into smaller blocks.
    ColPartition_LIST block_parts;
    ColPartition_IT block_it(&block_parts);
    ColPartition* next_part = NULL;
    bool text_block = false;
    do {
      ColPartition* part = part_it_.extract();
      if (part->blob_type() == BRT_UNKNOWN || part->blob_type() == BRT_TEXT)
        text_block = true;
      part->set_working_set(NULL);
      part_it_.forward();
      block_it.add_after_then_move(part);
      next_part = part->SingletonPartner(false);
      if (part_it_.empty() || next_part != part_it_.data()) {
        // Sequences of partitions can get split by titles.
        next_part = NULL;
      }
      // Merge adjacent blocks that are of the same type and let the
      // linespacing determine the real boundaries.
      if (next_part == NULL && !part_it_.empty()) {
        ColPartition* next_block_part = part_it_.data();
        const TBOX& part_box = part->bounding_box();
        const TBOX& next_box = next_block_part->bounding_box();

        // In addition to the same type, the next box must not be above the
        // current box, nor (if image) too far below.
        PolyBlockType type = part->type(), next_type = next_block_part->type();
        if (ColPartition::TypesSimilar(type, next_type) &&
            next_box.bottom() <= part_box.top() &&
            (text_block ||
             part_box.bottom() - next_box.top() < part_box.height()))
          next_part = next_block_part;
      }
    } while (!part_it_.empty() && next_part != NULL);
    if (!text_block) {
      TO_BLOCK* to_block = ColPartition::MakeBlock(bleft, tright,
                                                   &block_parts, used_parts);
      if (to_block != NULL) {
        TO_BLOCK_IT to_block_it(&to_blocks_);
        to_block_it.add_to_end(to_block);
        BLOCK_IT block_it(&completed_blocks_);
        block_it.add_to_end(to_block->block);
      }
    } else {
      // Further sub-divide text blocks where linespacing changes.
      ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts,
                                      used_parts,
                                      &completed_blocks_, &to_blocks_);
    }
  }
  part_it_.set_to_list(&part_set_);
  latest_part_ = NULL;
  ASSERT_HOST(completed_blocks_.length() == to_blocks_.length());
}
Esempio n. 2
0
// Adds the coverage, column count and box for a single partition,
// without adding it to the list. (Helper factored from ComputeCoverage.)
void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
  bounding_box_ += part.bounding_box();
  int coverage = part.ColumnWidth();
  if (part.good_width()) {
    good_coverage_ += coverage;
    good_column_count_ += 2;
  } else {
    if (part.blob_type() < BRT_UNKNOWN)
      coverage /= 2;
    if (part.good_column())
      ++good_column_count_;
    bad_coverage_ += coverage;
  }
}
Esempio n. 3
0
// Return a copy of this. If good_only will only copy the Good ColPartitions.
ColPartitionSet* ColPartitionSet::Copy(bool good_only) {
  ColPartition_LIST copy_parts;
  ColPartition_IT src_it(&parts_);
  ColPartition_IT dest_it(&copy_parts);
  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
    ColPartition* part = src_it.data();
    if (BLOBNBOX::IsTextType(part->blob_type()) &&
        (!good_only || part->good_width() || part->good_column()))
      dest_it.add_after_then_move(part->ShallowCopy());
  }
  if (dest_it.empty())
    return NULL;
  return new ColPartitionSet(&copy_parts);
}
Esempio n. 4
0
// Return true if this ColPartitionSet makes a legal column candidate by
// having legal individual partitions and non-overlapping adjacent pairs.
bool ColPartitionSet::LegalColumnCandidate() {
  ColPartition_IT it(&parts_);
  if (it.empty())
    return false;
  bool any_text_parts = false;
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    if (BLOBNBOX::IsTextType(part->blob_type())) {
      if (!part->IsLegal())
        return false;  // Individual partition is illegal.
      any_text_parts = true;
    }
    if (!it.at_last()) {
      ColPartition* next_part = it.data_relative(1);
      if (next_part->left_key() < part->right_key()) {
        return false;
      }
    }
  }
  return any_text_parts;
}
Esempio n. 5
0
// Returns the total width of all blobs in the part_set that do not lie
// within an approved column. Used as a cost measure for using this
// column set over another that might be compatible.
int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) {
  int total_width = 0;
  ColPartition_IT it(&part_set->parts_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    if (!BLOBNBOX::IsTextType(part->blob_type())) {
      continue;  // Non-text partitions are irrelevant to column compatibility.
    }
    int y = part->MidY();
    BLOBNBOX_C_IT box_it(part->boxes());
    for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
      const TBOX& box = it.data()->bounding_box();
      // Assume that the whole blob is outside any column iff its x-middle
      // is outside.
      int x = (box.left() + box.right()) / 2;
      ColPartition* col = ColumnContaining(x, y);
      if (col == NULL)
        total_width += box.width();
    }
  }
  return total_width;
}
Esempio n. 6
0
// Return true if the partitions in other are all compatible with the columns
// in this.
bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
                                        WidthCallback* cb) {
  if (debug) {
    tprintf("CompatibleColumns testing compatibility\n");
    Print();
    other->Print();
  }
  if (other->parts_.empty()) {
    if (debug)
      tprintf("CompatibleColumns true due to empty other\n");
    return true;
  }
  ColPartition_IT it(&other->parts_);
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
    ColPartition* part = it.data();
    if (part->blob_type() < BRT_UNKNOWN) {
      if (debug) {
        tprintf("CompatibleColumns ignoring image partition\n");
        part->Print();
      }
      continue;  // Image partitions are irrelevant to column compatibility.
    }
    int y = part->MidY();
    int left = part->bounding_box().left();
    int right = part->bounding_box().right();
    ColPartition* left_col = ColumnContaining(left, y);
    ColPartition* right_col = ColumnContaining(right, y);
    if (right_col == NULL || left_col == NULL) {
      if (debug) {
        tprintf("CompatibleColumns false due to partition edge outside\n");
        part->Print();
      }
      return false;  // A partition edge lies outside of all columns
    }
    if (right_col != left_col && cb->Run(right - left)) {
      if (debug) {
        tprintf("CompatibleColumns false due to good width in multiple cols\n");
        part->Print();
      }
      return false;  // Partition with a good width must be in a single column.
    }

    ColPartition_IT it2= it;
    while (!it2.at_last()) {
      it2.forward();
      ColPartition* next_part = it2.data();
      if (!BLOBNBOX::IsTextType(next_part->blob_type()))
        continue;  // Non-text partitions are irrelevant.
      int next_left = next_part->bounding_box().left();
      if (next_left == right) {
        break;  // They share the same edge, so one must be a pull-out.
      }
      // Search to see if right and next_left fall within a single column.
      ColPartition* next_left_col = ColumnContaining(next_left, y);
      if (right_col == next_left_col) {
        // There is a column break in this column.
        // This can be due to a figure caption within a column, a pull-out
        // block, or a simple broken textline that remains to be merged:
        // all allowed, or a change in column layout: not allowed.
        // If both partitions are of good width, then it is likely
        // a change in column layout, otherwise probably an allowed situation.
        if (part->good_width() && next_part->good_width()) {
          if (debug) {
            int next_right = next_part->bounding_box().right();
            tprintf("CompatibleColumns false due to 2 parts of good width\n");
            tprintf("part1 %d-%d, part2 %d-%d\n",
                    left, right, next_left, next_right);
            right_col->Print();
          }
          return false;
        }
      }
      break;
    }
  }
  if (debug)
    tprintf("CompatibleColumns true!\n");
  return true;
}