Пример #1
0
float filter_noise_blobs(                            //separate noise
                         BLOBNBOX_LIST *src_list,    //origonal list
                         BLOBNBOX_LIST *noise_list,  //noise list
                         BLOBNBOX_LIST *small_list,  //small blobs
                         BLOBNBOX_LIST *large_list   //large blobs
                        ) {
  INT16 height;                  //height of blob
  INT16 width;                   //of blob
  BLOBNBOX_IT src_it = src_list; //iterators
  BLOBNBOX_IT noise_it = noise_list;
  BLOBNBOX_IT small_it = small_list;
  BLOBNBOX_IT large_it = large_list;
  STATS size_stats (0, MAX_NEAREST_DIST);
  //blob heights
  if (textord_new_initial_xheight)
    return filter_noise_blobs2 (src_list, noise_list, small_list, large_list);
  float min_y;                   //size limits
  float max_y;
  float max_x;

  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
    if (src_it.data ()->bounding_box ().height () < textord_max_noise_size)
      noise_it.add_after_then_move (src_it.extract ());
  }
  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
    size_stats.add (src_it.data ()->bounding_box ().height (), 1);
  }
  min_y = floor (size_stats.ile (textord_blob_size_smallile / 100.0));
  max_y = ceil (size_stats.ile (textord_blob_size_bigile / 100.0));
  max_x = ceil (size_stats.ile (0.5) * textord_width_limit);
  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
    height = src_it.data ()->bounding_box ().height ();
    width = src_it.data ()->bounding_box ().width ();
    if (height < min_y)
      small_it.add_after_then_move (src_it.extract ());
    else if (height > max_y || width > max_x)
      large_it.add_after_then_move (src_it.extract ());
  }
  return size_stats.ile (textord_initialx_ile);
}
Пример #2
0
std::string cache_manager::stat_json() const {
	rapidjson::Document doc;
	doc.SetObject();
	auto &allocator = doc.GetAllocator();

	rapidjson::Value total_cache(rapidjson::kObjectType);

	rapidjson::Value size_stats(rapidjson::kObjectType);
	get_total_caches_size_stats_json(size_stats, allocator);

	total_cache.AddMember("size_stats", size_stats, allocator);
	doc.AddMember("total_cache", total_cache, allocator);

	rapidjson::Value caches(rapidjson::kObjectType);
	get_caches_size_stats_json(caches, allocator);
	doc.AddMember("caches", caches, allocator);

	rapidjson::StringBuffer buffer;
	rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
	doc.Accept(writer);
	return buffer.GetString();
}
Пример #3
0
float Textord::filter_noise_blobs(
    BLOBNBOX_LIST *src_list,      // original list
    BLOBNBOX_LIST *noise_list,    // noise list
    BLOBNBOX_LIST *small_list,    // small blobs
    BLOBNBOX_LIST *large_list) {  // large blobs
  int16_t height;                  //height of blob
  int16_t width;                   //of blob
  BLOBNBOX *blob;                //current blob
  float initial_x;               //first guess
  BLOBNBOX_IT src_it = src_list; //iterators
  BLOBNBOX_IT noise_it = noise_list;
  BLOBNBOX_IT small_it = small_list;
  BLOBNBOX_IT large_it = large_list;
  STATS size_stats (0, MAX_NEAREST_DIST);
  //blob heights
  float min_y;                   //size limits
  float max_y;
  float max_x;
  float max_height;              //of good blobs

  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
    blob = src_it.data();
    if (blob->bounding_box().height() < textord_max_noise_size)
      noise_it.add_after_then_move(src_it.extract());
    else if (blob->enclosed_area() >= blob->bounding_box().height()
      * blob->bounding_box().width() * textord_noise_area_ratio)
      small_it.add_after_then_move(src_it.extract());
  }
  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
    size_stats.add(src_it.data()->bounding_box().height(), 1);
  }
  initial_x = size_stats.ile(textord_initialx_ile);
  max_y = ceil(initial_x *
               (tesseract::CCStruct::kDescenderFraction +
                tesseract::CCStruct::kXHeightFraction +
                2 * tesseract::CCStruct::kAscenderFraction) /
               tesseract::CCStruct::kXHeightFraction);
  min_y = floor (initial_x / 2);
  max_x = ceil (initial_x * textord_width_limit);
  small_it.move_to_first ();
  for (small_it.mark_cycle_pt (); !small_it.cycled_list ();
  small_it.forward ()) {
    height = small_it.data()->bounding_box().height();
    if (height > max_y)
      large_it.add_after_then_move(small_it.extract ());
    else if (height >= min_y)
      src_it.add_after_then_move(small_it.extract ());
  }
  size_stats.clear ();
  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
    height = src_it.data ()->bounding_box ().height ();
    width = src_it.data ()->bounding_box ().width ();
    if (height < min_y)
      small_it.add_after_then_move (src_it.extract ());
    else if (height > max_y || width > max_x)
      large_it.add_after_then_move (src_it.extract ());
    else
      size_stats.add (height, 1);
  }
  max_height = size_stats.ile (textord_initialasc_ile);
  //      tprintf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,",
  //              max_y,min_y,initial_x,max_height);
  max_height *= tesseract::CCStruct::kXHeightCapRatio;
  if (max_height > initial_x)
    initial_x = max_height;
  //      tprintf(" ret=%g\n",initial_x);
  return initial_x;
}