Ejemplo n.º 1
0
ho_bitmap *
ho_bitmap_edge (const ho_bitmap * m, const int n)
{
  ho_bitmap *m_out;
  ho_bitmap *m_temp1;
  ho_bitmap *m_temp2;
  int i;

  m_temp1 = ho_bitmap_clone (m);
  if (!m_temp1)
    return NULL;

  for (i = 0; i < n; i++)
  {
    m_temp2 = ho_bitmap_erosion (m_temp1);
    ho_bitmap_free (m_temp1);
    if (!m_temp2)
      return NULL;
    m_temp1 = m_temp2;
  }

  m_out = ho_bitmap_clone (m);
  if (!m_out)
    return NULL;
  m_out->x = m->x;
  m_out->y = m->y;

  m_out->type = m->type;
  m_out->font_height = m->font_height;
  m_out->font_width = m->font_width;
  m_out->font_spacing = m->font_spacing;
  m_out->line_spacing = m->line_spacing;
  m_out->avg_line_fill = m->avg_line_fill;
  m_out->com_line_fill = m->com_line_fill;
  m_out->nikud = m->nikud;

  ho_bitmap_andnot (m_out, m_temp1);
  ho_bitmap_free (m_temp1);

  return m_out;
}
Ejemplo n.º 2
0
/**
 fill a text buffer with fonts recognized from a page layout

 @param l_page the page layout to recognize
 @param s_text_out the text buffer to fill
 @param html output format is html
 @param font_code code for the font to use
 @param nikud recognize nikud
 @param progress a progress indicator 0..100
 @return FALSE
 */
int hocr_font_recognition( const ho_layout* l_page, ho_string* s_text_out, HEBOCR_FONT_OPTIONS *font_options, int html, int* progress )
{
  int block_index;
  int line_index;
  int word_index;
  int font_index;
  int current_font_number = 0;
  int number_of_fonts = l_page->number_of_fonts;
  ho_bitmap *m_text = NULL;
  ho_bitmap *m_mask = NULL;
  ho_bitmap *m_font_main_sign = NULL;
  ho_bitmap *m_font_nikud = NULL;

  char text_out[200];
  const char *font;
  const char *font_nikud;
  const char *font_dagesh;
  const char *font_shin;

  /* init progress */
  *progress = 0;

  /* did we get a text buffer and a layout ? */
  if (!s_text_out || !l_page)
    return TRUE;

  /* loop over the layout */
  for (block_index = 0; block_index < l_page->n_blocks; block_index++)
  {
    /* start of paragraph */
    if (html)
    {
      /* FIXME: text overflow ?! */
      sprintf (text_out,
        "    <div class=\"ocr_par\" id=\"par_%d\" title=\"bbox %d %d %d %d\">\n",
        block_index + 1, l_page->m_blocks_text[block_index]->x,
        l_page->m_blocks_text[block_index]->y,
        l_page->m_blocks_text[block_index]->x +
        l_page->m_blocks_text[block_index]->width,
        l_page->m_blocks_text[block_index]->y +
        l_page->m_blocks_text[block_index]->height);
      ho_string_cat (s_text_out, text_out);
    }

    for (line_index = 0; line_index < l_page->n_lines[block_index];
      line_index++)
    {
      /* start of line */

      /* loop on all the words in this line */
      for (word_index = 0;
        word_index < l_page->n_words[block_index][line_index]; word_index++)
      {
        int word_length = l_page->n_fonts[block_index][line_index][word_index];

        unsigned char word_end = FALSE;

        unsigned char word_start = TRUE;

        int last_char_i = 0;

        int char_i = 0;

        /* start of word */
        for (font_index = 0; font_index < word_length; font_index++)
        {
          word_end = (font_index == (word_length - 1));
          word_start = (font_index == 0);

          /* get font images */

          /* get the font */
          m_text =
            ho_layout_get_font_text (l_page, block_index,
            line_index, word_index, font_index);
          if (!m_text)
            return TRUE;

          /* get font line mask */
          m_mask =
            ho_layout_get_font_line_mask (l_page, block_index,
            line_index, word_index, font_index);
          if (!m_mask)
            return TRUE;

          /* get font main sign */
          m_font_main_sign = ho_font_main_sign (m_text, m_mask);
          if (!m_font_main_sign)
            return TRUE;

          /* recognize font from images */
          last_char_i = char_i;
          font =
            ho_recognize_font (m_font_main_sign, m_mask,
	    font_options->font_code, font_options->do_linguistics, word_end, word_start, &char_i,
            last_char_i);

          /* insert font to text out */
          ho_string_cat (s_text_out, font);

          /* get font nikud */
	  if (font_options->nikud)
          {
            m_font_nikud = ho_bitmap_clone (m_text);
            if (!m_font_nikud)
              return TRUE;
            ho_bitmap_andnot (m_font_nikud, m_font_main_sign);

            /* recognize font from images */
            font_nikud = ho_recognize_nikud (m_font_nikud, m_mask,
	      font_options->font_code, &font_dagesh, &font_shin);

            /* free bitmaps */
            ho_bitmap_free (m_font_nikud);
            m_font_nikud = NULL;

            /* insert font nikud to text out */
            ho_string_cat (s_text_out, font_shin);
            ho_string_cat (s_text_out, font_dagesh);
            ho_string_cat (s_text_out, font_nikud);
          }

          /* free bitmaps */
          ho_bitmap_free (m_font_main_sign);
          ho_bitmap_free (m_text);
          ho_bitmap_free (m_mask);

          /* this are empty pointers */
          m_text = m_mask = m_font_main_sign = NULL;

          /* update progress */
          current_font_number++;
          *progress = 100 * current_font_number / number_of_fonts;
        }

        /* end of word */
        ho_string_cat (s_text_out, " ");
      }

      /* end of line */
      if (html)
        ho_string_cat (s_text_out, "<br/>\n");
      else
        ho_string_cat (s_text_out, "\n");
    }

    /* end of block */
    if (html)
      ho_string_cat (s_text_out, "<br/>\n    </div>\n");
    else
      ho_string_cat (s_text_out, "\n");
  }

  return FALSE;
}