// Returns the direction of the fitted line as a unit vector, using the // least mean squared perpendicular distance. The line runs through the // mean_point, i.e. a point p on the line is given by: // p = mean_point() + lambda * vector_fit() for some real number lambda. // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous // and may be negated without changing its meaning. FCOORD LLSQ::vector_fit() const { double x_var = x_variance(); double y_var = y_variance(); double covar = covariance(); FCOORD result; if (x_var >= y_var) { if (x_var == 0.0) return FCOORD(0.0f, 0.0f); result.set_x(x_var / sqrt(x_var * x_var + covar * covar)); result.set_y(sqrt(1.0 - result.x() * result.x())); } else { result.set_y(y_var / sqrt(y_var * y_var + covar * covar)); result.set_x(sqrt(1.0 - result.y() * result.y())); } if (covar < 0.0) result.set_y(-result.y()); return result; }
void POLY_BLOCK::rotate(FCOORD rotation) { FCOORD pos; //current pos; ICOORDELT *pt; //current point ICOORDELT_IT pts = &vertices; //iterator do { pt = pts.data (); pos.set_x (pt->x ()); pos.set_y (pt->y ()); pos.rotate (rotation); pt->set_x ((inT16) (floor (pos.x () + 0.5))); pt->set_y ((inT16) (floor (pos.y () + 0.5))); pts.forward (); } while (!pts.at_first ()); compute_bb(); }
// Groups blocks by rotation, then, for each group, makes a WordGrid and calls // TransferDiacriticsToWords to copy the diacritic blobs to the most // appropriate words in the group of blocks. Source blobs are not touched. void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, BLOCK_LIST* blocks) { // Angle difference larger than this is too much to consider equal. // They should only be in multiples of M_PI/2 anyway. const double kMaxAngleDiff = 0.01; // About 0.6 degrees. PointerVector<BlockGroup> groups; BLOCK_IT bk_it(blocks); for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) { BLOCK* block = bk_it.data(); if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { continue; } // Linear search of the groups to find a matching rotation. float block_angle = block->re_rotation().angle(); int best_g = 0; float best_angle_diff = MAX_FLOAT32; for (int g = 0; g < groups.size(); ++g) { double angle_diff = fabs(block_angle - groups[g]->angle); if (angle_diff > M_PI) angle_diff = fabs(angle_diff - 2.0 * M_PI); if (angle_diff < best_angle_diff) { best_angle_diff = angle_diff; best_g = g; } } if (best_angle_diff > kMaxAngleDiff) { groups.push_back(new BlockGroup(block)); } else { groups[best_g]->blocks.push_back(block); groups[best_g]->bounding_box += block->pdblk.bounding_box(); float x_height = block->x_height(); if (x_height < groups[best_g]->min_xheight) groups[best_g]->min_xheight = x_height; } } // Now process each group of blocks. PointerVector<WordWithBox> word_ptrs; for (int g = 0; g < groups.size(); ++g) { const BlockGroup* group = groups[g]; if (group->bounding_box.null_box()) continue; WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(), group->bounding_box.topright()); for (int b = 0; b < group->blocks.size(); ++b) { ROW_IT row_it(group->blocks[b]->row_list()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { ROW* row = row_it.data(); // Put the words of the row into the grid. WERD_IT w_it(row->word_list()); for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); WordWithBox* box_word = new WordWithBox(word); word_grid.InsertBBox(true, true, box_word); // Save the pointer where it will be auto-deleted. word_ptrs.push_back(box_word); } } } FCOORD rotation = group->rotation; // Make it a forward rotation that will transform blob coords to block. rotation.set_y(-rotation.y()); TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid); } }