コード例 #1
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
ho_bitmap *
ho_bitmap_filter_fill (const ho_bitmap * m)
{
  ho_objmap *m_obj;
  ho_bitmap *m_out;
  ho_bitmap *m_temp1;
  ho_bitmap *m_temp2;
  int index;
  int width, height;

  /* allocate memory */
  m_obj = ho_objmap_new_from_bitmap (m);
  if (!m_obj)
    return NULL;

  m_out = ho_bitmap_new (m->width, m->height);
  if (!m_out)
  {
    ho_objmap_free (m_obj);
    return NULL;
  }
  m_out->x = m->x;
  m_out->y = m->y;

  m_out->type = m->type;
  m_out->font_height = m->font_height;
  m_out->font_width = m->font_width;
  m_out->font_spacing = m->font_spacing;
  m_out->line_spacing = m->line_spacing;
  m_out->avg_line_fill = m->avg_line_fill;
  m_out->com_line_fill = m->com_line_fill;
  m_out->nikud = m->nikud;

  /* loop over all the objects and box them */
  for (index = 0; index < m_obj->obj_list->size; index++)
  {
    /* get a dimention factor */
    width = (((m_obj->obj_list)->objects)[index]).width;
    height = (((m_obj->obj_list)->objects)[index]).height;

    /* copy only the current object to a new bitmap */
    m_temp1 = ho_objmap_to_bitmap_by_index (m_obj, index);

    /* fill the current object */
    m_temp2 = ho_bitmap_hlink (m_temp1, width / 4);
    ho_bitmap_free (m_temp1);
    m_temp1 = ho_bitmap_vlink (m_temp2, height / 4);
    ho_bitmap_free (m_temp2);

    /* add to matrix out */
    ho_bitmap_or (m_out, m_temp1);
    ho_bitmap_free (m_temp1);
  }

  return m_out;
}
コード例 #2
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
ho_bitmap *
ho_bitmap_filter_set_height_from_bottom (const ho_bitmap * m,
  const int height, const int top, const int bottom)
{
  ho_objmap *m_obj;
  ho_bitmap *m_out;
  ho_bitmap *m_temp1;
  ho_bitmap *m_temp2;
  int index;

  /* allocate memory */
  m_obj = ho_objmap_new_from_bitmap (m);
  if (!m_obj)
    return NULL;

  m_out = ho_bitmap_new (m->width, m->height);
  if (!m_out)
  {
    ho_objmap_free (m_obj);
    return NULL;
  }
  m_out->x = m->x;
  m_out->y = m->y;

  m_out->type = m->type;
  m_out->font_height = m->font_height;
  m_out->font_width = m->font_width;
  m_out->font_spacing = m->font_spacing;
  m_out->line_spacing = m->line_spacing;
  m_out->avg_line_fill = m->avg_line_fill;
  m_out->com_line_fill = m->com_line_fill;
  m_out->nikud = m->nikud;

  /* loop over all the objects and box them */
  for (index = 0; index < m_obj->obj_list->size; index++)
  {
    /* copy only the current object to a new bitmap */
    m_temp1 = ho_objmap_to_bitmap_by_index (m_obj, index);
    if (!m_temp1)
      continue;

    /* take height pixels from this object */
    m_temp2 = ho_bitmap_set_height_from_bottom (m_temp1, height, top, bottom);
    ho_bitmap_free (m_temp1);
    if (!m_temp2)
      continue;

    /* add to matrix out */
    ho_bitmap_or (m_out, m_temp2);
    ho_bitmap_free (m_temp2);
  }

  return m_out;
}
コード例 #3
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
int
ho_bitmap_binop_window (ho_bitmap * m_left, const ho_bitmap * m_right,
  unsigned char op)
{
  ho_bitmap *m_temp = NULL;
  int i;

  m_temp =
    ho_bitmap_clone_window (m_right, m_right->x, m_right->y, m_right->width,
    m_right->height);
  if (!m_temp)
    return TRUE;

  switch (op)
  {
  case 0:                      /* and */
    for (i = 0; i < m_right->height * m_right->rowstride; i++)
      m_left->data[i] &= m_temp->data[i];
    break;
  case 1:                      /* or */
    for (i = 0; i < m_right->height * m_right->rowstride; i++)
      m_left->data[i] |= m_temp->data[i];
    break;
  case 2:                      /* xor */
    for (i = 0; i < m_right->height * m_right->rowstride; i++)
      m_left->data[i] ^= m_temp->data[i];
    break;
  case 3:                      /* andnot */
    for (i = 0; i < m_right->height * m_right->rowstride; i++)
      m_left->data[i] &= ~m_temp->data[i];
    break;
  }
  ho_bitmap_free (m_temp);
  return FALSE;
}
コード例 #4
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
ho_bitmap *
ho_bitmap_edge (const ho_bitmap * m, const int n)
{
  ho_bitmap *m_out;
  ho_bitmap *m_temp1;
  ho_bitmap *m_temp2;
  int i;

  m_temp1 = ho_bitmap_clone (m);
  if (!m_temp1)
    return NULL;

  for (i = 0; i < n; i++)
  {
    m_temp2 = ho_bitmap_erosion (m_temp1);
    ho_bitmap_free (m_temp1);
    if (!m_temp2)
      return NULL;
    m_temp1 = m_temp2;
  }

  m_out = ho_bitmap_clone (m);
  if (!m_out)
    return NULL;
  m_out->x = m->x;
  m_out->y = m->y;

  m_out->type = m->type;
  m_out->font_height = m->font_height;
  m_out->font_width = m->font_width;
  m_out->font_spacing = m->font_spacing;
  m_out->line_spacing = m->line_spacing;
  m_out->avg_line_fill = m->avg_line_fill;
  m_out->com_line_fill = m->com_line_fill;
  m_out->nikud = m->nikud;

  ho_bitmap_andnot (m_out, m_temp1);
  ho_bitmap_free (m_temp1);

  return m_out;
}
コード例 #5
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
ho_bitmap *
ho_bitmap_closing (const ho_bitmap * m)
{
  ho_bitmap *m_temp;
  ho_bitmap *m_out;

  if (!m)
    return NULL;
  
  m_temp = ho_bitmap_dilation (m);
  m_out = ho_bitmap_erosion (m_temp);
  ho_bitmap_free (m_temp);

  return m_out;
}
コード例 #6
0
ファイル: hocr.c プロジェクト: blackpearlrgh/hebocr
int hocr_do_ocr_fine (const ho_pixbuf * pix_in, ho_string * s_text_out, HEBOCR_IMAGE_OPTIONS *options, HEBOCR_LAYOUT_OPTIONS* layout_options, HEBOCR_FONT_OPTIONS *font_options, int *progress)
{
  ho_bitmap *m_in = NULL;
  ho_layout *l_page = NULL;
  if (!pix_in)
    return TRUE;

  m_in = hocr_image_processing( pix_in, options,progress);
  if (!m_in)
    return TRUE;

  l_page = hocr_layout_analysis (m_in, layout_options, progress);

  if (!l_page)
  {
    ho_bitmap_free (m_in);
    return TRUE;
  }

  return hocr_font_recognition( l_page, s_text_out, font_options, layout_options->html, progress );
}
コード例 #7
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
ho_bitmap *
ho_bitmap_filter_hlink (ho_bitmap * m, int size, int max_height)
{
  ho_bitmap *m_out;
  ho_bitmap *m_temp;

  /* this function use objects by the "_by_size" function this is why it is a
   * filter and not regular bitmap function */

  /* get only the thin objects */
  m_temp = ho_bitmap_filter_by_size (m, 5, max_height, 5, m->width / 2);

  /* hlink the thin objects */
  m_out = ho_bitmap_hlink (m_temp, size);

  /* add the rest of the large objects */
  ho_bitmap_or (m_out, m);

  ho_bitmap_free (m_temp);

  return m_out;
}
コード例 #8
0
ファイル: hocr.c プロジェクト: blackpearlrgh/hebocr
/**
 convert a gray pixbuf to bitmap

 @param pix_in the input ho_pixbuf
 @param options image process options
 @param progress a progress indicator 0..100
 @return newly allocated gray ho_bitmap
 */
ho_bitmap *hocr_image_processing (const ho_pixbuf * pix_in, HEBOCR_IMAGE_OPTIONS *image_options, int *progress)
{

  ho_bitmap *bitmap_out = NULL;
  ho_bitmap *bitmap_temp = NULL;
  double angle = 0.0;
  int scale_by = 0;
  unsigned char size = 0;

  /* init progress */
  *progress = 0;

  /* get the raw b/w bitmap from the pixbuf */
  bitmap_temp = ho_pixbuf_to_bitmap_wrapper(pix_in, image_options, size);
  if (!bitmap_temp)
    return NULL;

  /* update progress */
  *progress = 25;

  /* do we want to auto scale ? */
  if (!image_options->scale && image_options->auto_scale)
  {
    /* get fonts size for autoscale */
    if (ho_dimentions_font_width_height_nikud (bitmap_temp, 6, 200, 6, 200))
      return NULL;

    /* if fonts are too small, re-scale image */
    if (bitmap_temp->font_height < 15)
      scale_by = 3;
    else if (bitmap_temp->font_height < 30)
      scale_by = 2;
    else
      scale_by = 1;

    if (scale_by > 1)
    {
      /* re-create bitmap */
      ho_bitmap_free (bitmap_temp);
      bitmap_temp = ho_pixbuf_to_bitmap_wrapper (pix_in, image_options, size);
      if (!bitmap_temp)
        return NULL;
    }
  }

  /* update progress */
  *progress = 50;

  /* remove very small and very large things */
  bitmap_out =
    ho_bitmap_filter_by_size (bitmap_temp, 3, 3 * bitmap_temp->height / 4, 3,
    3 * bitmap_temp->width / 4);
  ho_bitmap_free (bitmap_temp);
  if (!bitmap_out)
    return NULL;

  /* update progress */
  *progress = 75;

  /* rotate image */
  if (image_options->rotation_angle != 0)
  {
    bitmap_temp = ho_bitmap_rotate(bitmap_out, image_options->rotation_angle);
    ho_bitmap_free (bitmap_out);
    if (!bitmap_temp)
      return NULL;

    bitmap_out = bitmap_temp;
  }
  else if (image_options->auto_rotate)
  {
    /* get fonts size for auto angle */
    if (ho_dimentions_font_width_height_nikud (bitmap_out, 6, 200, 6, 200))
      return NULL;

    angle = ho_dimentions_get_lines_angle (bitmap_out);
    if (angle)
    {
      bitmap_temp = ho_bitmap_rotate (bitmap_out, angle);
      ho_bitmap_free (bitmap_out);
      if (!bitmap_temp)
        return NULL;

      bitmap_out = bitmap_temp;
    }
  }

  return bitmap_out;
}
コード例 #9
0
ファイル: hocr.c プロジェクト: blackpearlrgh/hebocr
/**
 fill a text buffer with fonts recognized from a page layout

 @param l_page the page layout to recognize
 @param s_text_out the text buffer to fill
 @param html output format is html
 @param font_code code for the font to use
 @param nikud recognize nikud
 @param progress a progress indicator 0..100
 @return FALSE
 */
int hocr_font_recognition( const ho_layout* l_page, ho_string* s_text_out, HEBOCR_FONT_OPTIONS *font_options, int html, int* progress )
{
  int block_index;
  int line_index;
  int word_index;
  int font_index;
  int current_font_number = 0;
  int number_of_fonts = l_page->number_of_fonts;
  ho_bitmap *m_text = NULL;
  ho_bitmap *m_mask = NULL;
  ho_bitmap *m_font_main_sign = NULL;
  ho_bitmap *m_font_nikud = NULL;

  char text_out[200];
  const char *font;
  const char *font_nikud;
  const char *font_dagesh;
  const char *font_shin;

  /* init progress */
  *progress = 0;

  /* did we get a text buffer and a layout ? */
  if (!s_text_out || !l_page)
    return TRUE;

  /* loop over the layout */
  for (block_index = 0; block_index < l_page->n_blocks; block_index++)
  {
    /* start of paragraph */
    if (html)
    {
      /* FIXME: text overflow ?! */
      sprintf (text_out,
        "    <div class=\"ocr_par\" id=\"par_%d\" title=\"bbox %d %d %d %d\">\n",
        block_index + 1, l_page->m_blocks_text[block_index]->x,
        l_page->m_blocks_text[block_index]->y,
        l_page->m_blocks_text[block_index]->x +
        l_page->m_blocks_text[block_index]->width,
        l_page->m_blocks_text[block_index]->y +
        l_page->m_blocks_text[block_index]->height);
      ho_string_cat (s_text_out, text_out);
    }

    for (line_index = 0; line_index < l_page->n_lines[block_index];
      line_index++)
    {
      /* start of line */

      /* loop on all the words in this line */
      for (word_index = 0;
        word_index < l_page->n_words[block_index][line_index]; word_index++)
      {
        int word_length = l_page->n_fonts[block_index][line_index][word_index];

        unsigned char word_end = FALSE;

        unsigned char word_start = TRUE;

        int last_char_i = 0;

        int char_i = 0;

        /* start of word */
        for (font_index = 0; font_index < word_length; font_index++)
        {
          word_end = (font_index == (word_length - 1));
          word_start = (font_index == 0);

          /* get font images */

          /* get the font */
          m_text =
            ho_layout_get_font_text (l_page, block_index,
            line_index, word_index, font_index);
          if (!m_text)
            return TRUE;

          /* get font line mask */
          m_mask =
            ho_layout_get_font_line_mask (l_page, block_index,
            line_index, word_index, font_index);
          if (!m_mask)
            return TRUE;

          /* get font main sign */
          m_font_main_sign = ho_font_main_sign (m_text, m_mask);
          if (!m_font_main_sign)
            return TRUE;

          /* recognize font from images */
          last_char_i = char_i;
          font =
            ho_recognize_font (m_font_main_sign, m_mask,
	    font_options->font_code, font_options->do_linguistics, word_end, word_start, &char_i,
            last_char_i);

          /* insert font to text out */
          ho_string_cat (s_text_out, font);

          /* get font nikud */
	  if (font_options->nikud)
          {
            m_font_nikud = ho_bitmap_clone (m_text);
            if (!m_font_nikud)
              return TRUE;
            ho_bitmap_andnot (m_font_nikud, m_font_main_sign);

            /* recognize font from images */
            font_nikud = ho_recognize_nikud (m_font_nikud, m_mask,
	      font_options->font_code, &font_dagesh, &font_shin);

            /* free bitmaps */
            ho_bitmap_free (m_font_nikud);
            m_font_nikud = NULL;

            /* insert font nikud to text out */
            ho_string_cat (s_text_out, font_shin);
            ho_string_cat (s_text_out, font_dagesh);
            ho_string_cat (s_text_out, font_nikud);
          }

          /* free bitmaps */
          ho_bitmap_free (m_font_main_sign);
          ho_bitmap_free (m_text);
          ho_bitmap_free (m_mask);

          /* this are empty pointers */
          m_text = m_mask = m_font_main_sign = NULL;

          /* update progress */
          current_font_number++;
          *progress = 100 * current_font_number / number_of_fonts;
        }

        /* end of word */
        ho_string_cat (s_text_out, " ");
      }

      /* end of line */
      if (html)
        ho_string_cat (s_text_out, "<br/>\n");
      else
        ho_string_cat (s_text_out, "\n");
    }

    /* end of block */
    if (html)
      ho_string_cat (s_text_out, "<br/>\n    </div>\n");
    else
      ho_string_cat (s_text_out, "\n");
  }

  return FALSE;
}
コード例 #10
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
ho_bitmap *
ho_bitmap_filter_obj_extend_lateraly (const ho_bitmap * m, const int ext_width)
{
  ho_objmap *m_obj;

  ho_bitmap *m_temp;
  ho_bitmap *m_out;

  int x, y;
  int index;
  int width, height;

  m_temp = ho_bitmap_clone (m);
  if (!m_temp)
    return NULL;

  /* loop over all objects and extend them lateraly */
  /* allocate memory */
  m_obj = ho_objmap_new_from_bitmap (m_temp);
  if (!m_obj)
  {
    ho_bitmap_free (m_temp);
    return NULL;
  }

  /* draw stopers */
  for (index = 0; index < m_obj->obj_list->size; index++)
  {
    x = (((m_obj->obj_list)->objects)[index]).x;
    y = (((m_obj->obj_list)->objects)[index]).y;
    width = (((m_obj->obj_list)->objects)[index]).width;
    height = (((m_obj->obj_list)->objects)[index]).height;

    if (x - ext_width < 0)
      x = ext_width;
    if (x + width + ext_width >= m->width)
      width = m->width - x - ext_width - 1;

    ho_bitmap_draw_vline (m_temp, x - ext_width, y, height);
    ho_bitmap_draw_vline (m_temp, x + width + ext_width, y, height);
  }

  /* extend */
  m_out = ho_bitmap_hlink (m_temp, 7 * ext_width / 4);

  ho_bitmap_free (m_temp);
  if (!m_out)
    return NULL;

  /* delete stopers */
  for (index = 0; index < m_obj->obj_list->size; index++)
  {
    x = (((m_obj->obj_list)->objects)[index]).x;
    y = (((m_obj->obj_list)->objects)[index]).y;
    width = (((m_obj->obj_list)->objects)[index]).width;
    height = (((m_obj->obj_list)->objects)[index]).height;

    if (x - ext_width < 0)
      x = ext_width;
    if (x + width + ext_width >= m->width)
      width = m->width - x - ext_width - 1;
    ho_bitmap_delete_vline (m_out, x - ext_width, y, height);
    ho_bitmap_delete_vline (m_out, x + width + ext_width, y, height);
  }

  /* set origin */
  m_out->x = m->x;
  m_out->y = m->y;

  m_out->type = m->type;
  m_out->font_height = m->font_height;
  m_out->font_width = m->font_width;
  m_out->font_spacing = m->font_spacing;
  m_out->line_spacing = m->line_spacing;
  m_out->avg_line_fill = m->avg_line_fill;
  m_out->com_line_fill = m->com_line_fill;
  m_out->nikud = m->nikud;

  ho_objmap_free (m_obj);

  return m_out;
}
コード例 #11
0
ファイル: ho_bitmap.c プロジェクト: BackupTheBerlios/hocr-svn
ho_bitmap *
ho_bitmap_filter_remove_dots (const ho_bitmap * m,
  const unsigned char erosion_n, const unsigned char dilation_n)
{
  int x, y;
  unsigned char sum;
  ho_bitmap *m_temp;
  ho_bitmap *m_out;
  ho_objmap *m_obj;
  int index;
  int width, height;

  /* allocate memory */
  m_out = ho_bitmap_new (m->width, m->height);
  if (!m_out)
    return NULL;
  m_out->x = m->x;
  m_out->y = m->y;

  m_out->type = m->type;
  m_out->font_height = m->font_height;
  m_out->font_width = m->font_width;
  m_out->font_spacing = m->font_spacing;
  m_out->line_spacing = m->line_spacing;
  m_out->avg_line_fill = m->avg_line_fill;
  m_out->com_line_fill = m->com_line_fill;
  m_out->nikud = m->nikud;

  /* connect all the small dots */
  m_temp = ho_bitmap_dilation (m);

  /* check the size of objects */
  m_obj = ho_objmap_new_from_bitmap (m_temp);
  ho_bitmap_free (m_temp);

  for (x = 1; x < (m->width - 1); x++)
    for (y = 1; y < (m->height - 1); y++)
    {

      /* check the size of this pixel's object */
      index = ho_objmap_get (m_obj, x, y);
      if (index)
      {
        width = (((m_obj->obj_list)->objects)[index - 1]).width;
        height = (((m_obj->obj_list)->objects)[index - 1]).height;
      }
      else
      {
        width = 0;
        height = 0;
      }

      /* in a big object do erosion */
      if (width > m->width / 4 || height > m->height / 4)
      {
        if (ho_bitmap_get (m, x, y))  /* black pixel */
        {
          sum = ho_bitmap_get (m, x - 1, y - 1) +
            ho_bitmap_get (m, x - 1, y) +
            ho_bitmap_get (m, x - 1, y + 1) +
            ho_bitmap_get (m, x, y - 1) +
            ho_bitmap_get (m, x, y + 1) +
            ho_bitmap_get (m, x + 1, y - 1) +
            ho_bitmap_get (m, x + 1, y) + ho_bitmap_get (m, x + 1, y + 1);
          /* n number of white pixels or more */
          if ((8 - sum) < erosion_n)
            ho_bitmap_set (m_out, x, y);
        }
      }
      else                      /* if in a small object do dilation */
      {
        if (!ho_bitmap_get (m, x, y)) /* white pixel */
        {
          sum = ho_bitmap_get (m, x - 1, y - 1) +
            ho_bitmap_get (m, x - 1, y) +
            ho_bitmap_get (m, x - 1, y + 1) +
            ho_bitmap_get (m, x, y - 1) +
            ho_bitmap_get (m, x, y + 1) +
            ho_bitmap_get (m, x + 1, y - 1) +
            ho_bitmap_get (m, x + 1, y) + ho_bitmap_get (m, x + 1, y + 1);
          /* n number of black neighbors or more */
          if (sum >= dilation_n)
            ho_bitmap_set (m_out, x, y);
        }
        else
          ho_bitmap_set (m_out, x, y);
      }
    }

  ho_objmap_free (m_obj);

  return m_out;
}