Ejemplo n.º 1
0
GList<DjVuTXT::Zone *>
DjVuTXT::find_text_in_rect(GRect target_rect, GUTF8String &text) const
   // returns a list of zones of type WORD in the nearest/selected paragraph 
{
   GList<Zone *> zone_list;
   GList<Zone *> lines;

   get_zones((int)PARAGRAPH, &page_zone, zone_list);
   // it's possible that no paragraph structure exists for reasons that  
   // 1) ocr engine is not capable 2) file was modified by user. In such case, 
   // we can only make a rough guess, i.e., select all the lines intersected with
   // target_rect
   if (zone_list.isempty())
   {
      get_zones((int)LINE, &page_zone, zone_list);
      GPosition pos;
      for(pos=zone_list; pos; ++pos)
      {
	 GRect rect=zone_list[pos]->rect;
	 int h0=rect.height()/2;
	 if(rect.intersect(rect,target_rect) && rect.height()>h0)
	    lines.append(zone_list[pos]);
      }
   } else 
   {
      GPosition pos, pos_sel=zone_list;
      float ar=0;
      for(pos=zone_list; pos; ++pos)
      {
	 GRect rect=zone_list[pos]->rect;
	 int area=rect.area();
	 if (rect.intersect(rect, target_rect))
	 {
	    float ftmp=rect.area()/(float)area;
	    if ( !ar || ar<ftmp )
	    {
	       ar=ftmp;
	       pos_sel=pos;
	    }
	 }
      }
      Zone *parag = 0;
      if ( ar>0 ) parag=zone_list[pos_sel];
      zone_list.empty();
      if ( ar>0 ) 
      {
	 get_zones((int)LINE, parag, zone_list);
	 if ( !zone_list.isempty() )
	 {
	    for(GPosition pos=zone_list; pos; ++pos)
	    {
	       GRect rect=zone_list[pos]->rect;
	       int h0=rect.height()/2;
	       if(rect.intersect(rect,target_rect) && rect.height()>h0)
		  lines.append(zone_list[pos]);
	    }
	 }
      }
   }

   zone_list.empty();
   if (!lines.isempty()) 
   {
      int i=1, lsize=lines.size();

      GList<Zone *> words;
      for (GPosition pos=lines; pos; ++pos, ++i)
      {
	 words.empty();
	 get_zones((int)WORD, lines[pos], words);

	 if ( lsize==1 )
	 {
	    for(GPosition p=words;p;++p)
	    {
	       GRect rect=words[p]->rect;
	       if(rect.intersect(rect,target_rect))
	       //if (target_rect.contains(words[p]->rect))
		  zone_list.append(words[p]);
	    }
	 } else
	 {
	    if (i==1)
	    {
	       bool start=true;
	       for(GPosition p=words; p; ++p)
	       {
		  if ( start )
		  {
		     GRect rect=words[p]->rect;
		     if(rect.intersect(rect,target_rect))
			//if (target_rect.contains(words[p]->rect))
		     {
			start=false;
			zone_list.append(words[p]);
		     }
		  } else 
		     zone_list.append(words[p]);
	       }
	    } else if (i==lsize)
	    {
	       bool end=true;
	       for(GPosition p=words.lastpos();p;--p)
	       {
		  if ( end )
		  {
		     GRect rect=words[p]->rect;
		     if(rect.intersect(rect,target_rect))
			//if(target_rect.contains(words[p]->rect) )
		     {
			end=false;
			zone_list.append(words[p]);
		     }
		  } else 
		     zone_list.append(words[p]);
	       }
	    }

	    if (i!=1 && i!=lsize )
	    {
	       for(GPosition p=words;p;++p)
		  zone_list.append(words[p]);
	    }
	 }
      }
   } 

   return zone_list;
}