예제 #1
0
float Tesseract::blob_noise_score(TBLOB *blob) {
  TBOX box;                       // BB of outline
  inT16 outline_count = 0;
  inT16 max_dimension;
  inT16 largest_outline_dimension = 0;

  for (TESSLINE* ol = blob->outlines; ol != NULL; ol= ol->next) {
    outline_count++;
    box = ol->bounding_box();
    if (box.height() > box.width()) {
      max_dimension = box.height();
    } else {
      max_dimension = box.width();
    }

    if (largest_outline_dimension < max_dimension)
      largest_outline_dimension = max_dimension;
  }

  if (outline_count > 5) {
    // penalise LOTS of blobs
    largest_outline_dimension *= 2;
  }

  box = blob->bounding_box();
  if (box.bottom() > kBlnBaselineOffset * 4 ||
      box.top() < kBlnBaselineOffset / 2) {
    // Lax blob is if high or low
    largest_outline_dimension /= 2;
  }

  return largest_outline_dimension;
}
예제 #2
0
파일: blobs.cpp 프로젝트: 11110101/tess-two
// Sets up the built-in DENORM and normalizes the blob in-place.
// For parameters see DENORM::SetupNormalization, plus the inverse flag for
// this blob and the Pix for the full image.
void TBLOB::Normalize(const BLOCK* block,
                      const FCOORD* rotation,
                      const DENORM* predecessor,
                      float x_origin, float y_origin,
                      float x_scale, float y_scale,
                      float final_xshift, float final_yshift,
                      bool inverse, Pix* pix) {
  denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin,
                             x_scale, y_scale, final_xshift, final_yshift);
  denorm_.set_inverse(inverse);
  denorm_.set_pix(pix);
  // TODO(rays) outline->Normalize is more accurate, but breaks tests due
  // the changes it makes. Reinstate this code with a retraining.
  // The reason this change is troublesome is that it normalizes for the
  // baseline value computed independently at each x-coord. If the baseline
  // is not horizontal, this introduces shear into the normalized blob, which
  // is useful on the rare occasions that the baseline is really curved, but
  // the baselines need to be stabilized the rest of the time.
#if 0
  for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
    outline->Normalize(denorm_);
  }
#else
  denorm_.LocalNormBlob(this);
#endif
}
예제 #3
0
/**********************************************************************
 * TBLOB::bounding_box()
 *
 * Compute the bounding_box of a compound blob, defined to be the
 * bounding box of the union of all top-level outlines in the blob.
 **********************************************************************/
TBOX TBLOB::bounding_box() const {
  if (outlines == nullptr) return TBOX(0, 0, 0, 0);
  TESSLINE* outline = outlines;
  TBOX box = outline->bounding_box();
  for (outline = outline->next; outline != nullptr; outline = outline->next) {
    box += outline->bounding_box();
  }
  return box;
}
예제 #4
0
파일: blobs.cpp 프로젝트: ErwcPKerr/node-dv
// Normalize in-place using the DENORM.
void TBLOB::Normalize(const DENORM& denorm) {
  // TODO(rays) outline->Normalize is more accurate, but breaks tests due
  // the changes it makes. Reinstate this code with a retraining.
#if 1
  for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
    outline->Normalize(denorm);
  }
#else
  denorm.LocalNormBlob(this);
#endif
}
예제 #5
0
파일: seam.cpp 프로젝트: 11110101/tess-two
/**
 * @name find_split_in_blob
 *
 * @returns TRUE if the split is somewhere in this blob.
 */
bool find_split_in_blob(SPLIT *split, TBLOB *blob) {
  TESSLINE *outline;

  for (outline = blob->outlines; outline != NULL; outline = outline->next)
    if (outline->Contains(split->point1->pos))
      break;
  if (outline == NULL)
    return FALSE;
  for (outline = blob->outlines; outline != NULL; outline = outline->next)
    if (outline->Contains(split->point2->pos))
      return TRUE;
  return FALSE;
}
예제 #6
0
파일: blobs.cpp 프로젝트: dqsoft/tesseract
// Finds and deletes any duplicate outlines in this blob, without deleting
// their EDGEPTs.
void TBLOB::EliminateDuplicateOutlines() {
  for (TESSLINE* outline = outlines; outline != nullptr; outline = outline->next) {
    TESSLINE* last_outline = outline;
    for (TESSLINE* other_outline = outline->next; other_outline != nullptr;
         last_outline = other_outline, other_outline = other_outline->next) {
      if (outline->SameBox(*other_outline)) {
        last_outline->next = other_outline->next;
        // This doesn't leak - the outlines share the EDGEPTs.
        other_outline->loop = nullptr;
        delete other_outline;
        other_outline = last_outline;
        // If it is part of a cut, then it can't be a hole any more.
        outline->is_hole = false;
      }
    }
  }
}
예제 #7
0
파일: intfx.cpp 프로젝트: dqsoft/tesseract
// Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as
// (x,y) position and angle as measured counterclockwise from the vector
// <-1, 0>, from blob using two normalizations defined by bl_denorm and
// cn_denorm. See SetpuBLCNDenorms for definitions.
// If outline_cn_counts is not nullptr, on return it contains the cumulative
// number of cn features generated for each outline in the blob (in order).
// Thus after the first outline, there were (*outline_cn_counts)[0] features,
// after the second outline, there were (*outline_cn_counts)[1] features etc.
void Classify::ExtractFeatures(const TBLOB& blob,
                               bool nonlinear_norm,
                               GenericVector<INT_FEATURE_STRUCT>* bl_features,
                               GenericVector<INT_FEATURE_STRUCT>* cn_features,
                               INT_FX_RESULT_STRUCT* results,
                               GenericVector<int>* outline_cn_counts) {
  DENORM bl_denorm, cn_denorm;
  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
                                        &bl_denorm, &cn_denorm, results);
  if (outline_cn_counts != nullptr)
    outline_cn_counts->truncate(0);
  // Iterate the outlines.
  for (TESSLINE* ol = blob.outlines; ol != nullptr; ol = ol->next) {
    // Iterate the polygon.
    EDGEPT* loop_pt = ol->FindBestStartPt();
    EDGEPT* pt = loop_pt;
    if (pt == nullptr) continue;
    do {
      if (pt->IsHidden()) continue;
      // Find a run of equal src_outline.
      EDGEPT* last_pt = pt;
      do {
        last_pt = last_pt->next;
      } while (last_pt != loop_pt && !last_pt->IsHidden() &&
               last_pt->src_outline == pt->src_outline);
      last_pt = last_pt->prev;
      // Until the adaptive classifier can be weaned off polygon segments,
      // we have to force extraction from the polygon for the bl_features.
      ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
                             true, bl_features);
      ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
                             false, cn_features);
      pt = last_pt;
    } while ((pt = pt->next) != loop_pt);
    if (outline_cn_counts != nullptr)
      outline_cn_counts->push_back(cn_features->size());
  }
  results->NumBL = bl_features->size();
  results->NumCN = cn_features->size();
  results->YBottom = blob.bounding_box().bottom();
  results->YTop = blob.bounding_box().top();
  results->Width = blob.bounding_box().width();
}
예제 #8
0
파일: blobs.cpp 프로젝트: 11110101/tess-two
// Consume the circular list of EDGEPTs to make a TESSLINE.
TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) {
  TESSLINE* result = new TESSLINE;
  result->loop = outline;
  if (outline->src_outline != NULL) {
    // ASSUMPTION: This function is only ever called from ApproximateOutline
    // and therefore either all points have a src_outline or all do not.
		// Just as SetupFromPos sets the vectors from the vertices, setup the
		// step_count members to indicate the (positive) number of original
		// C_OUTLINE steps to the next vertex.
		EDGEPT* pt = outline;
		do {
		  pt->step_count = pt->next->start_step - pt->start_step;
		  if (pt->step_count < 0)
		    pt->step_count += pt->src_outline->pathlength();
		  pt = pt->next;
		} while (pt != outline);
  }
  result->SetupFromPos();
  return result;
}
예제 #9
0
// Consume the circular list of EDGEPTs to make a TESSLINE.
TESSLINE* TESSLINE::BuildFromOutlineList(EDGEPT* outline) {
  TESSLINE* result = new TESSLINE;
  result->loop = outline;
  result->SetupFromPos();
  return result;
}
예제 #10
0
void TBLOB::plot(ScrollView* window, ScrollView::Color color,
                 ScrollView::Color child_color) {
  for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next)
    outline->plot(window, color, child_color);
}
예제 #11
0
// Recomputes the bounding boxes of the outlines.
void TBLOB::ComputeBoundingBoxes() {
  for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
    outline->ComputeBoundingBox();
  }
}
예제 #12
0
// Scales by the given factor in place.
void TBLOB::Scale(float factor) {
  for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
    outline->Scale(factor);
  }
}
예제 #13
0
// Moves by the given vec in place.
void TBLOB::Move(const ICOORD vec) {
  for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
    outline->Move(vec);
  }
}
예제 #14
0
// Rotates by the given rotation in place.
void TBLOB::Rotate(const FCOORD rotation) {
  for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next) {
    outline->Rotate(rotation);
  }
}