// Adds the selected outlines to the indcated real blobs, and puts the rest // back in rej_cblobs where they came from. Where the target_blobs entry is // nullptr, a run of wanted outlines is put into a single new blob. // Ownership of the outlines is transferred back to the word. (Hence // GenericVector and not PointerVector.) // Returns true if any new blob was added to the start of the word, which // suggests that it might need joining to the word before it, and likewise // sets make_next_word_fuzzy true if any new blob was added to the end. bool WERD::AddSelectedOutlines(const GenericVector<bool>& wanted, const GenericVector<C_BLOB*>& target_blobs, const GenericVector<C_OUTLINE*>& outlines, bool* make_next_word_fuzzy) { bool outline_added_to_start = false; if (make_next_word_fuzzy != nullptr) *make_next_word_fuzzy = false; C_BLOB_IT rej_it(&rej_cblobs); for (int i = 0; i < outlines.size(); ++i) { C_OUTLINE* outline = outlines[i]; if (outline == nullptr) continue; // Already used it. if (wanted[i]) { C_BLOB* target_blob = target_blobs[i]; TBOX noise_box = outline->bounding_box(); if (target_blob == nullptr) { target_blob = new C_BLOB(outline); // Need to find the insertion point. C_BLOB_IT blob_it(&cblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); TBOX blob_box = blob->bounding_box(); if (blob_box.left() > noise_box.left()) { if (blob_it.at_first() && !flag(W_FUZZY_SP) && !flag(W_FUZZY_NON)) { // We might want to join this word to its predecessor. outline_added_to_start = true; } blob_it.add_before_stay_put(target_blob); break; } } if (blob_it.cycled_list()) { blob_it.add_to_end(target_blob); if (make_next_word_fuzzy != nullptr) *make_next_word_fuzzy = true; } // Add all consecutive wanted, but null-blob outlines to same blob. C_OUTLINE_IT ol_it(target_blob->out_list()); while (i + 1 < outlines.size() && wanted[i + 1] && target_blobs[i + 1] == nullptr) { ++i; ol_it.add_to_end(outlines[i]); } } else { // Insert outline into this blob. C_OUTLINE_IT ol_it(target_blob->out_list()); ol_it.add_to_end(outline); } } else { // Put back on noise list. rej_it.add_to_end(new C_BLOB(outline)); } } return outline_added_to_start; }
// Merge this with other, taking the outlines from other. // Other is not deleted, but left for the caller to handle. void BLOBNBOX::really_merge(BLOBNBOX* other) { if (cblob_ptr != NULL && other->cblob_ptr != NULL) { C_OUTLINE_IT ol_it(cblob_ptr->out_list()); ol_it.add_list_after(other->cblob_ptr->out_list()); } compute_bounding_box(); }
// Build a fake outline, given just a bounding box and append to the list. void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) { C_OUTLINE_IT ol_it(outlines); // Make a C_OUTLINE from the bounds. This is a bit of a hack, // as there is no outline, just a bounding box, but it works nicely. CRACKEDGE start; start.pos = box.topleft(); C_OUTLINE* outline = new C_OUTLINE(&start, box.topleft(), box.botright(), 0); ol_it.add_to_end(outline); }
// Extracts all the noise outlines and stuffs the pointers into the given // vector of outlines. Afterwards, the outlines vector owns the pointers. void WERD::GetNoiseOutlines(GenericVector<C_OUTLINE*>* outlines) { C_BLOB_IT rej_it(&rej_cblobs); for (rej_it.mark_cycle_pt(); !rej_it.empty(); rej_it.forward()) { C_BLOB* blob = rej_it.extract(); C_OUTLINE_IT ol_it(blob->out_list()); outlines->push_back(ol_it.extract()); delete blob; } }
// Removes noise from the word by moving small outlines to the rej_cblobs // list, based on the size_threshold. void WERD::CleanNoise(float size_threshold) { C_BLOB_IT blob_it(&cblobs); C_BLOB_IT rej_it(&rej_cblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); C_OUTLINE_IT ol_it(blob->out_list()); for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { C_OUTLINE* outline = ol_it.data(); TBOX ol_box = outline->bounding_box(); int ol_size = ol_box.width() > ol_box.height() ? ol_box.width() : ol_box.height(); if (ol_size < size_threshold) { // This outline is too small. Move it to a separate blob in the // reject blobs list. C_BLOB* rej_blob = new C_BLOB(ol_it.extract()); rej_it.add_after_then_move(rej_blob); } } if (blob->out_list()->empty()) delete blob_it.extract(); } }