예제 #1
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
/// All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
                                     BLOCK_LIST *block_list) {
  // Strip all fuzzy space markers to simplify the PAGE_RES.
  BLOCK_IT b_it(block_list);
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
    BLOCK* block = b_it.data();
    ROW_IT r_it(block->row_list());
    for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) {
      ROW* row = r_it.data();
      WERD_IT w_it(row->word_list());
      for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
        WERD* word = w_it.data();
        if (word->cblob_list()->empty()) {
          delete w_it.extract();
        } else {
          word->set_flag(W_FUZZY_SP, false);
          word->set_flag(W_FUZZY_NON, false);
  PAGE_RES* page_res = new PAGE_RES(false, block_list, NULL);
  PAGE_RES_IT pr_it(page_res);
  WERD_RES* word_res;
  while ((word_res = pr_it.word()) != NULL) {
    MaximallyChopWord(boxes, pr_it.block()->block,
                      pr_it.row()->row, word_res);
  return page_res;
예제 #2
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) {
  PAGE_RES_IT pr_it(page_res);
  C_BLOB_LIST new_blobs;               // list of gathered blobs
  C_BLOB_IT new_blob_it = &new_blobs;  // iterator

  for (WERD_RES* word_res = pr_it.word(); word_res != NULL;
       word_res = pr_it.forward()) {
    WERD* word = word_res->word;
    if (word->bounding_box().overlap(selection_box)) {
      C_BLOB_IT blob_it(word->cblob_list());
      for (blob_it.mark_cycle_pt();
           !blob_it.cycled_list(); blob_it.forward()) {
        C_BLOB* blob = blob_it.data();
        if (blob->bounding_box().overlap(selection_box)) {
      if (!new_blobs.empty()) {
        WERD* pseudo_word = new WERD(&new_blobs, 1, NULL);
        word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
        PAGE_RES_IT* it = new PAGE_RES_IT(page_res);
        while (it->word() != word_res && it->word() != NULL) it->forward();
        ASSERT_HOST(it->word() == word_res);
        return it;
  return NULL;
SOP_PrimGroupCentroid::baryCenter(const GU_Detail *input_geo,
                                  GA_Range &pr_range,
                                  const GA_PrimitiveList &prim_list,
                                  UT_Vector3 &pos)
    GA_Range                    pt_range;

    GA_OffsetArray              points;
    GA_OffsetArray::const_iterator points_it;

    // We need to iterate over each primitive in the range and
    // find out which points it references.
    for (GA_Iterator pr_it(pr_range); !pr_it.atEnd(); ++pr_it)
        // Get the range of points for the primitive using the
        // offset from the primitive list.
        pt_range = prim_list.get(*pr_it)->getPointRange();

        // Add each point's offset to the array, checking for duplicates.
        for (GA_Iterator pt_it(pt_range); !pt_it.atEnd(); ++pt_it)
            points.append(*pt_it, true);

    // Reset the position.

    // Add the positions for all the points.
    for (points_it = points.begin(); !points_it.atEnd(); ++points_it)
        pos += input_geo->getPos3(*points_it);

    // Store the average position for all the points we found.
    pos /= points.entries();
SOP_PrimGroupCentroid::boundingBox(const GU_Detail *input_geo,
                                   GA_Range &pr_range,
                                   const GA_PrimitiveList &prim_list,
                                   UT_Vector3 &pos)
    GA_Range                    pt_range;

    UT_BoundingBox              bbox;

    // Initialize the bounding box to contain nothing and have
    // no position.

    // Iterate over each primitive in the range.
    for (GA_Iterator pr_it(pr_range); !pr_it.atEnd(); ++pr_it)
        // Get the range of points for the primitive using the
        // offset from the primitive list.
        pt_range = prim_list.get(*pr_it)->getPointRange();

        // For each point in the primitive, enlarge the bounding
        // box to contain it.
        for (GA_Iterator pt_it(pt_range); !pt_it.atEnd(); ++pt_it)

    // Extract the center.
    pos = bbox.center();
예제 #5
/// - Counts up the labelled words and the blobs within.
/// - Deletes all unused or emptied words, counting the unused ones.
/// - Resets W_BOL and W_EOL flags correctly.
/// - Builds the rebuild_word and rebuilds the box_word and the best_choice.
void Tesseract::TidyUp(PAGE_RES* page_res) {
  int ok_blob_count = 0;
  int bad_blob_count = 0;
  int ok_word_count = 0;
  int unlabelled_words = 0;
  PAGE_RES_IT pr_it(page_res);
  WERD_RES* word_res;
  for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
    int ok_in_word = 0;
    int blob_count = word_res->correct_text.size();
    WERD_CHOICE* word_choice = new WERD_CHOICE(word_res->uch_set, blob_count);
    for (int c = 0; c < blob_count; ++c) {
      if (word_res->correct_text[c].length() > 0) {
      // Since we only need a fake word_res->best_choice, the actual
      // unichar_ids do not matter. Which is fortunate, since TidyUp()
      // can be called while training Tesseract, at the stage where
      // unicharset is not meaningful yet.
          INVALID_UNICHAR_ID, word_res->best_state[c], 1.0f, -1.0f);
    if (ok_in_word > 0) {
      ok_blob_count += ok_in_word;
      bad_blob_count += word_res->correct_text.size() - ok_in_word;
      word_res->LogNewCookedChoice(1, false, word_choice);
    } else {
      if (applybox_debug > 0) {
        tprintf("APPLY_BOXES: Unlabelled word at :");
      delete word_choice;
  for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
    // Denormalize back to a BoxWord.
    word_res->word->set_flag(W_BOL, pr_it.prev_row() != pr_it.row());
    word_res->word->set_flag(W_EOL, pr_it.next_row() != pr_it.row());
  if (applybox_debug > 0) {
    tprintf("   Found %d good blobs.\n", ok_blob_count);
    if (bad_blob_count > 0) {
      tprintf("   Leaving %d unlabelled blobs in %d words.\n",
              bad_blob_count, ok_word_count);
    if (unlabelled_words > 0)
      tprintf("   %d remaining unlabelled words deleted.\n", unlabelled_words);
예제 #6
/// Calls #LearnWord to extract features for labelled blobs within each word.
/// Features are stored in an internal buffer.
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
  PAGE_RES_IT pr_it(page_res);
  int word_count = 0;
  for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
       word_res = pr_it.forward()) {
    LearnWord(fontname.string(), word_res);
  tprintf("Generated training data for %d words\n", word_count);
예제 #7
 *  do_re_display()
 *  Redisplay page
void Tesseract::do_re_display(
    BOOL8 (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it)) {
  int block_count = 1;

  if (display_image != 0) {
    image_win->Image(pix_binary_, 0, 0);

  PAGE_RES_IT pr_it(current_page_res);
  for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
    if (display_baselines && pr_it.row() != pr_it.prev_row())
      pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN);
    if (display_blocks && pr_it.block() != pr_it.prev_block())
      pr_it.block()->block->plot(image_win, block_count++, ScrollView::RED);
예제 #8
/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
  PAGE_RES_IT pr_it(page_res);
  for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
       word_res = pr_it.forward()) {
    WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set,
    for (int i = 0; i < word_res->correct_text.size(); ++i) {
      // The part before the first space is the real ground truth, and the
      // rest is the bounding box location and page number.
      GenericVector<STRING> tokens;
      word_res->correct_text[i].split(' ', &tokens);
      UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string());
                                                0.0f, 0.0f);
    word_res->LogNewCookedChoice(1, false, choice);
예제 #9
void show_point(PAGE_RES* page_res, float x, float y) {
  FCOORD pt(x, y);
  PAGE_RES_IT pr_it(page_res);

  const int kBufsize = 512;
  char msg[kBufsize];
  char *msg_ptr = msg;

  msg_ptr += sprintf(msg_ptr, "Pt:(%0.3f, %0.3f) ", x, y);

  for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
    if (pr_it.row() != pr_it.prev_row() &&
        pr_it.row()->row->bounding_box().contains(pt)) {
      msg_ptr += sprintf(msg_ptr, "BL(x)=%0.3f ",
    if (word->word->bounding_box().contains(pt)) {
      TBOX box = word->word->bounding_box();
      msg_ptr += sprintf(msg_ptr, "Wd(%d, %d)/(%d, %d) ",
                         box.left(), box.bottom(),
                         box.right(), box.top());
      C_BLOB_IT cblob_it(word->word->cblob_list());
      for (cblob_it.mark_cycle_pt();
           cblob_it.forward()) {
        C_BLOB* cblob = cblob_it.data();
        box = cblob->bounding_box();
        if (box.contains(pt)) {
          msg_ptr += sprintf(msg_ptr,
                             "CBlb(%d, %d)/(%d, %d) ",
                             box.left(), box.bottom(),
                             box.right(), box.top());
예제 #10
/// Resegments the words by running the classifier in an attempt to find the
/// correct segmentation that produces the required string.
void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
  PAGE_RES_IT pr_it(page_res);
  WERD_RES* word_res;
  for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
    WERD* word = word_res->word;
    if (word->text() == NULL || word->text()[0] == '\0')
      continue;  // Ignore words that have no text.
    // Convert the correct text to a vector of UNICHAR_ID
    GenericVector<UNICHAR_ID> target_text;
    if (!ConvertStringToUnichars(word->text(), &target_text)) {
      tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n",
    if (!FindSegmentation(target_text, word_res)) {
      tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n",
SOP_PrimGroupCentroid::bindToCentroids(fpreal t, int mode, int method)
    int                         behavior;
    exint                       int_value;

    const GA_PrimitiveGroup     *group;
    GA_PrimitiveGroup           *all_prims, *temp_group;
    GA_Range                    pr_range;
    GA_ROAttributeRef           attr_gah, primattr_gah;
    GA_ROHandleI                class_h;
    GA_ROHandleS                str_h;

    const GU_Detail             *input_geo;

    UT_Matrix4                  mat;
    UT_String                   attr_name, pattern, str_value;
    UT_Vector3                  pos;

    // Get the second input geometry as read only.
    GU_DetailHandleAutoReadLock gdl(inputGeoHandle(1));
    input_geo = gdl.getGdp();

    // Get the unmatched geometry behavior.
    behavior = BEHAVIOR(t);

    // Create a new attribute reference map.
    GA_AttributeRefMap          hmap(*gdp, input_geo);

    // Get the attribute selection string.
    BIND(pattern, t);

    // If we have a pattern, try to build the ref map.
    if (pattern.length() > 0)
        buildRefMap(hmap, pattern, gdp, input_geo, mode, GA_ATTRIB_POINT);

    // The list of GA_Primitives in the input geometry.
    const GA_PrimitiveList &prim_list = gdp->getPrimitiveList();

    // Create a temporary primitive group so we can keep track of all the
    // primitives we have modified.
    all_prims = createAdhocPrimGroup(*gdp, "allprims");

    // Determine which attribute we need from the points, based on the mode.
    switch (mode)
        case 0:
            attr_name = "group";
        case 1:
            attr_name = "name";
        case 2:
            attr_name = "class";
            addError(SOP_MESSAGE, "Invalid mode setting");
            return 1;

    // Find the attribute.
    attr_gah = input_geo->findPointAttribute(attr_name);

    // If there is no attribute, add an error message and quit.
    if (attr_gah.isInvalid())
        addError(SOP_ATTRIBUTE_INVALID, attr_name);
        return 1;

    // If not using groups, we need to check if the matching primitive
    // attribute exists on the geometry.
    if (mode != 0)
        // Try to find the attribute.
        primattr_gah = gdp->findPrimitiveAttribute(attr_name);

        // If there is no attribute, add an error message and quit.
        if (primattr_gah.isInvalid())
            addError(SOP_ATTRIBUTE_INVALID, attr_name);
            return 1;

    // 'class' uses the int handle.
    if (mode == 2)
    // Groups and 'name' use the string handle.

    for (GA_Iterator it(input_geo->getPointRange()); !it.atEnd(); ++it)
        if (mode == 0)
            // Get the unique string value.
            str_value = str_h.get(*it);

            // Find the group on the geometry to bind.
            group = gdp->findPrimitiveGroup(str_value);

            // Ignore non-existent groups.
            if (!group)

            // Skip emptry groups.
            if (group->isEmpty())

            // The primtives in the group.
            pr_range = gdp->getPrimitiveRange(group);
            if (mode == 1)
                // Get the unique string value.
                str_value = str_h.get(*it);
                // Get the prims with that string value.
                pr_range = gdp->getRangeByValue(primattr_gah, str_value);
                // Get the unique integer value.
                int_value = class_h.get(*it);
                // Get the prims with that integery value.
                pr_range = gdp->getRangeByValue(primattr_gah, int_value);
            // Create an adhoc group.
            temp_group = createAdhocPrimGroup(*gdp);

        // Add the primitives in the range to the groups.

        // Bounding Box
        if (method == 1)
            // Calculate the bouding box center for this range.
            boundingBox(gdp, pr_range, prim_list, pos);
        // Center of Mass
        else if (method == 2)
            // Calculate the center of mass for this attribute value.
            centerOfMass(pr_range, prim_list, pos);
        // Barycenter
            // Calculate the barycenter for this attribute value.
            baryCenter(gdp, pr_range, prim_list, pos);

        // Build the transform from the point information.
        buildTransform(mat, input_geo, pos, *it);

        // Transform the geometry from the centroid.
        if (mode == 0)
            gdp->transform(mat, group);
            gdp->transform(mat, temp_group);

        // Copy any necessary attributes from the incoming points to the
        // geometry.
        if (hmap.entries())
            for (GA_Iterator pr_it(pr_range); !pr_it.atEnd(); ++pr_it)

    // We want to destroy prims that didn't have a matching name/group.
    if (behavior)
        // Flip the membership of all the prims that we did see.

        // Destroy the ones that we didn't.
        gdp->deletePrimitives(*all_prims, true);

    return 0;
예제 #12
SOP_Rain::cookMySop(OP_Context &context)
    //UT_Interrupt    *boss;
    if (error() < UT_ERROR_ABORT)
        //boss = UTgetInterrupt();
        //boss->opStart("Start generating rain");
        fpreal now = TIME(context.getTime());
        long nPoints = NPOINTS( now );
        UT_Vector3 rainDirection = RAINDIRECTION(now);
        //rainDirection.normalize(); //TODO: check for (0,0,0) vector

        RainData rain(  now,
                        nPoints, BOUNDMIN (now), BOUNDMAX (now),
                        DICEMIN(now), DICEMAX(now), SEED(now),
                        SPEED (now),
                        SPEEDVARIENCE (now));

        if(rain.getAllocationState() == false || isPointsNumberChanged_ == true)
        if( rain.getAllocationState() == true && 
            ( rain.getCachedState() == false || isParameterChanged_ == true ) )

        if (isPointsGenerated_ == false)
            printf("Generate Points procedure\n");

            generatePoints(gdp, nPoints);
            isPointsGenerated_ = true;

        for (   GA_Iterator pr_it(gdp->getPrimitiveRange());
            GEO_Primitive* prim = gdp->getGEOPrimitive(*pr_it);
            GA_Range range = prim->getPointRange();
            rain.shiftPositions( gdp, range);            


    isParameterChanged_ = false;
    isPointsNumberChanged_ = false;
    return error();