/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: /// All fuzzy spaces are removed, and all the words are maximally chopped. PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes, BLOCK_LIST *block_list) { PreenXHeights(block_list); // Strip all fuzzy space markers to simplify the PAGE_RES. BLOCK_IT b_it(block_list); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); ROW_IT r_it(block->row_list()); for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) { ROW* row = r_it.data(); WERD_IT w_it(row->word_list()); for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { WERD* word = w_it.data(); if (word->cblob_list()->empty()) { delete w_it.extract(); } else { word->set_flag(W_FUZZY_SP, false); word->set_flag(W_FUZZY_NON, false); } } } } PAGE_RES* page_res = new PAGE_RES(false, block_list, NULL); PAGE_RES_IT pr_it(page_res); WERD_RES* word_res; while ((word_res = pr_it.word()) != NULL) { MaximallyChopWord(boxes, pr_it.block()->block, pr_it.row()->row, word_res); pr_it.forward(); } return page_res; }
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) { PAGE_RES_IT pr_it(page_res); C_BLOB_LIST new_blobs; // list of gathered blobs C_BLOB_IT new_blob_it = &new_blobs; // iterator for (WERD_RES* word_res = pr_it.word(); word_res != NULL; word_res = pr_it.forward()) { WERD* word = word_res->word; if (word->bounding_box().overlap(selection_box)) { C_BLOB_IT blob_it(word->cblob_list()); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* blob = blob_it.data(); if (blob->bounding_box().overlap(selection_box)) { new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob)); } } if (!new_blobs.empty()) { WERD* pseudo_word = new WERD(&new_blobs, 1, NULL); word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word); PAGE_RES_IT* it = new PAGE_RES_IT(page_res); while (it->word() != word_res && it->word() != NULL) it->forward(); ASSERT_HOST(it->word() == word_res); return it; } } } return NULL; }
void SOP_PrimGroupCentroid::baryCenter(const GU_Detail *input_geo, GA_Range &pr_range, const GA_PrimitiveList &prim_list, UT_Vector3 &pos) { GA_Range pt_range; GA_OffsetArray points; GA_OffsetArray::const_iterator points_it; // We need to iterate over each primitive in the range and // find out which points it references. for (GA_Iterator pr_it(pr_range); !pr_it.atEnd(); ++pr_it) { // Get the range of points for the primitive using the // offset from the primitive list. pt_range = prim_list.get(*pr_it)->getPointRange(); // Add each point's offset to the array, checking for duplicates. for (GA_Iterator pt_it(pt_range); !pt_it.atEnd(); ++pt_it) points.append(*pt_it, true); } // Reset the position. pos.assign(0,0,0); // Add the positions for all the points. for (points_it = points.begin(); !points_it.atEnd(); ++points_it) pos += input_geo->getPos3(*points_it); // Store the average position for all the points we found. pos /= points.entries(); }
void SOP_PrimGroupCentroid::boundingBox(const GU_Detail *input_geo, GA_Range &pr_range, const GA_PrimitiveList &prim_list, UT_Vector3 &pos) { GA_Range pt_range; UT_BoundingBox bbox; // Initialize the bounding box to contain nothing and have // no position. bbox.initBounds(); // Iterate over each primitive in the range. for (GA_Iterator pr_it(pr_range); !pr_it.atEnd(); ++pr_it) { // Get the range of points for the primitive using the // offset from the primitive list. pt_range = prim_list.get(*pr_it)->getPointRange(); // For each point in the primitive, enlarge the bounding // box to contain it. for (GA_Iterator pt_it(pt_range); !pt_it.atEnd(); ++pt_it) bbox.enlargeBounds(input_geo->getPos3(*pt_it)); } // Extract the center. pos = bbox.center(); }
/// - Counts up the labelled words and the blobs within. /// - Deletes all unused or emptied words, counting the unused ones. /// - Resets W_BOL and W_EOL flags correctly. /// - Builds the rebuild_word and rebuilds the box_word and the best_choice. void Tesseract::TidyUp(PAGE_RES* page_res) { int ok_blob_count = 0; int bad_blob_count = 0; int ok_word_count = 0; int unlabelled_words = 0; PAGE_RES_IT pr_it(page_res); WERD_RES* word_res; for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) { int ok_in_word = 0; int blob_count = word_res->correct_text.size(); WERD_CHOICE* word_choice = new WERD_CHOICE(word_res->uch_set, blob_count); word_choice->set_permuter(TOP_CHOICE_PERM); for (int c = 0; c < blob_count; ++c) { if (word_res->correct_text[c].length() > 0) { ++ok_in_word; } // Since we only need a fake word_res->best_choice, the actual // unichar_ids do not matter. Which is fortunate, since TidyUp() // can be called while training Tesseract, at the stage where // unicharset is not meaningful yet. word_choice->append_unichar_id_space_allocated( INVALID_UNICHAR_ID, word_res->best_state[c], 1.0f, -1.0f); } if (ok_in_word > 0) { ok_blob_count += ok_in_word; bad_blob_count += word_res->correct_text.size() - ok_in_word; word_res->LogNewRawChoice(word_choice); word_res->LogNewCookedChoice(1, false, word_choice); } else { ++unlabelled_words; if (applybox_debug > 0) { tprintf("APPLY_BOXES: Unlabelled word at :"); word_res->word->bounding_box().print(); } pr_it.DeleteCurrentWord(); delete word_choice; } } pr_it.restart_page(); for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) { // Denormalize back to a BoxWord. word_res->RebuildBestState(); word_res->SetupBoxWord(); word_res->word->set_flag(W_BOL, pr_it.prev_row() != pr_it.row()); word_res->word->set_flag(W_EOL, pr_it.next_row() != pr_it.row()); } if (applybox_debug > 0) { tprintf(" Found %d good blobs.\n", ok_blob_count); if (bad_blob_count > 0) { tprintf(" Leaving %d unlabelled blobs in %d words.\n", bad_blob_count, ok_word_count); } if (unlabelled_words > 0) tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words); } }
/// Calls #LearnWord to extract features for labelled blobs within each word. /// Features are stored in an internal buffer. void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) { PAGE_RES_IT pr_it(page_res); int word_count = 0; for (WERD_RES *word_res = pr_it.word(); word_res != NULL; word_res = pr_it.forward()) { LearnWord(fontname.string(), word_res); ++word_count; } tprintf("Generated training data for %d words\n", word_count); }
/** * do_re_display() * * Redisplay page */ void Tesseract::do_re_display( BOOL8 (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it)) { int block_count = 1; image_win->Clear(); if (display_image != 0) { image_win->Image(pix_binary_, 0, 0); } PAGE_RES_IT pr_it(current_page_res); for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { (this->*word_painter)(&pr_it); if (display_baselines && pr_it.row() != pr_it.prev_row()) pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN); if (display_blocks && pr_it.block() != pr_it.prev_block()) pr_it.block()->block->plot(image_win, block_count++, ScrollView::RED); } image_win->Update(); }
/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) { PAGE_RES_IT pr_it(page_res); for (WERD_RES *word_res = pr_it.word(); word_res != NULL; word_res = pr_it.forward()) { WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set, word_res->correct_text.size()); for (int i = 0; i < word_res->correct_text.size(); ++i) { // The part before the first space is the real ground truth, and the // rest is the bounding box location and page number. GenericVector<STRING> tokens; word_res->correct_text[i].split(' ', &tokens); UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string()); choice->append_unichar_id_space_allocated(char_id, word_res->best_state[i], 0.0f, 0.0f); } word_res->ClearWordChoices(); word_res->LogNewRawChoice(choice); word_res->LogNewCookedChoice(1, false, choice); } }
void show_point(PAGE_RES* page_res, float x, float y) { FCOORD pt(x, y); PAGE_RES_IT pr_it(page_res); const int kBufsize = 512; char msg[kBufsize]; char *msg_ptr = msg; msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y); for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { if (pr_it.row() != pr_it.prev_row() && pr_it.row()->row->bounding_box().contains(pt)) { msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ", pr_it.row()->row->base_line(x)); } if (word->word->bounding_box().contains(pt)) { TBOX box = word->word->bounding_box(); msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); C_BLOB_IT cblob_it(word->word->cblob_list()); for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) { C_BLOB* cblob = cblob_it.data(); box = cblob->bounding_box(); if (box.contains(pt)) { msg_ptr += sprintf(msg_ptr, "CBlb(%d, %d)/(%d, %d) ", box.left(), box.bottom(), box.right(), box.top()); } } } } image_win->AddMessage(msg); }
/// Resegments the words by running the classifier in an attempt to find the /// correct segmentation that produces the required string. void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) { PAGE_RES_IT pr_it(page_res); WERD_RES* word_res; for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) { WERD* word = word_res->word; if (word->text() == NULL || word->text()[0] == '\0') continue; // Ignore words that have no text. // Convert the correct text to a vector of UNICHAR_ID GenericVector<UNICHAR_ID> target_text; if (!ConvertStringToUnichars(word->text(), &target_text)) { tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n", word->text()); pr_it.DeleteCurrentWord(); continue; } if (!FindSegmentation(target_text, word_res)) { tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n", word->text()); pr_it.DeleteCurrentWord(); continue; } } }
int SOP_PrimGroupCentroid::bindToCentroids(fpreal t, int mode, int method) { int behavior; exint int_value; const GA_PrimitiveGroup *group; GA_PrimitiveGroup *all_prims, *temp_group; GA_Range pr_range; GA_ROAttributeRef attr_gah, primattr_gah; GA_ROHandleI class_h; GA_ROHandleS str_h; const GU_Detail *input_geo; UT_Matrix4 mat; UT_String attr_name, pattern, str_value; UT_Vector3 pos; // Get the second input geometry as read only. GU_DetailHandleAutoReadLock gdl(inputGeoHandle(1)); input_geo = gdl.getGdp(); // Get the unmatched geometry behavior. behavior = BEHAVIOR(t); // Create a new attribute reference map. GA_AttributeRefMap hmap(*gdp, input_geo); // Get the attribute selection string. BIND(pattern, t); // If we have a pattern, try to build the ref map. if (pattern.length() > 0) buildRefMap(hmap, pattern, gdp, input_geo, mode, GA_ATTRIB_POINT); // The list of GA_Primitives in the input geometry. const GA_PrimitiveList &prim_list = gdp->getPrimitiveList(); // Create a temporary primitive group so we can keep track of all the // primitives we have modified. all_prims = createAdhocPrimGroup(*gdp, "allprims"); // Determine which attribute we need from the points, based on the mode. switch (mode) { case 0: attr_name = "group"; break; case 1: attr_name = "name"; break; case 2: attr_name = "class"; break; default: addError(SOP_MESSAGE, "Invalid mode setting"); return 1; } // Find the attribute. attr_gah = input_geo->findPointAttribute(attr_name); // If there is no attribute, add an error message and quit. if (attr_gah.isInvalid()) { addError(SOP_ATTRIBUTE_INVALID, attr_name); return 1; } // If not using groups, we need to check if the matching primitive // attribute exists on the geometry. if (mode != 0) { // Try to find the attribute. primattr_gah = gdp->findPrimitiveAttribute(attr_name); // If there is no attribute, add an error message and quit. if (primattr_gah.isInvalid()) { addError(SOP_ATTRIBUTE_INVALID, attr_name); return 1; } } // 'class' uses the int handle. if (mode == 2) class_h.bind(attr_gah.getAttribute()); // Groups and 'name' use the string handle. else str_h.bind(attr_gah.getAttribute()); for (GA_Iterator it(input_geo->getPointRange()); !it.atEnd(); ++it) { if (mode == 0) { // Get the unique string value. str_value = str_h.get(*it); // Find the group on the geometry to bind. group = gdp->findPrimitiveGroup(str_value); // Ignore non-existent groups. if (!group) continue; // Skip emptry groups. if (group->isEmpty()) continue; // The primtives in the group. pr_range = gdp->getPrimitiveRange(group); } else { if (mode == 1) { // Get the unique string value. str_value = str_h.get(*it); // Get the prims with that string value. pr_range = gdp->getRangeByValue(primattr_gah, str_value); } else { // Get the unique integer value. int_value = class_h.get(*it); // Get the prims with that integery value. pr_range = gdp->getRangeByValue(primattr_gah, int_value); } // Create an adhoc group. temp_group = createAdhocPrimGroup(*gdp); temp_group->addRange(pr_range); } // Add the primitives in the range to the groups. all_prims->addRange(pr_range); // Bounding Box if (method == 1) { // Calculate the bouding box center for this range. boundingBox(gdp, pr_range, prim_list, pos); } // Center of Mass else if (method == 2) { // Calculate the center of mass for this attribute value. centerOfMass(pr_range, prim_list, pos); } // Barycenter else { // Calculate the barycenter for this attribute value. baryCenter(gdp, pr_range, prim_list, pos); } // Build the transform from the point information. buildTransform(mat, input_geo, pos, *it); // Transform the geometry from the centroid. if (mode == 0) gdp->transform(mat, group); else gdp->transform(mat, temp_group); // Copy any necessary attributes from the incoming points to the // geometry. if (hmap.entries()) { for (GA_Iterator pr_it(pr_range); !pr_it.atEnd(); ++pr_it) { hmap.copyValue(GA_ATTRIB_PRIMITIVE, *pr_it, GA_ATTRIB_POINT, *it); } } } // We want to destroy prims that didn't have a matching name/group. if (behavior) { // Flip the membership of all the prims that we did see. all_prims->toggleEntries(); // Destroy the ones that we didn't. gdp->deletePrimitives(*all_prims, true); } return 0; }
OP_ERROR SOP_Rain::cookMySop(OP_Context &context) { //UT_Interrupt *boss; if (error() < UT_ERROR_ABORT) { //boss = UTgetInterrupt(); //boss->opStart("Start generating rain"); fpreal now = TIME(context.getTime()); long nPoints = NPOINTS( now ); UT_Vector3 rainDirection = RAINDIRECTION(now); //rainDirection.normalize(); //TODO: check for (0,0,0) vector RainData rain( now, nPoints, BOUNDMIN (now), BOUNDMAX (now), rainDirection, DICEMIN(now), DICEMAX(now), SEED(now), SPEED (now), SPEEDVARIENCE (now)); if(rain.getAllocationState() == false || isPointsNumberChanged_ == true) { rain.allocate(nPoints); } if( rain.getAllocationState() == true && ( rain.getCachedState() == false || isParameterChanged_ == true ) ) { rain.computeInitialPositions(); rain.setCachedState(true); } if (isPointsGenerated_ == false) { printf("Generate Points procedure\n"); gdp->clearAndDestroy(); generatePoints(gdp, nPoints); isPointsGenerated_ = true; } for ( GA_Iterator pr_it(gdp->getPrimitiveRange()); !pr_it.atEnd(); ++pr_it) { GEO_Primitive* prim = gdp->getGEOPrimitive(*pr_it); GA_Range range = prim->getPointRange(); rain.shiftPositions( gdp, range); } //boss->opEnd(); } isParameterChanged_ = false; isPointsNumberChanged_ = false; //unlockInputs(); return error(); }