ho_bitmap * ho_bitmap_edge (const ho_bitmap * m, const int n) { ho_bitmap *m_out; ho_bitmap *m_temp1; ho_bitmap *m_temp2; int i; m_temp1 = ho_bitmap_clone (m); if (!m_temp1) return NULL; for (i = 0; i < n; i++) { m_temp2 = ho_bitmap_erosion (m_temp1); ho_bitmap_free (m_temp1); if (!m_temp2) return NULL; m_temp1 = m_temp2; } m_out = ho_bitmap_clone (m); if (!m_out) return NULL; m_out->x = m->x; m_out->y = m->y; m_out->type = m->type; m_out->font_height = m->font_height; m_out->font_width = m->font_width; m_out->font_spacing = m->font_spacing; m_out->line_spacing = m->line_spacing; m_out->avg_line_fill = m->avg_line_fill; m_out->com_line_fill = m->com_line_fill; m_out->nikud = m->nikud; ho_bitmap_andnot (m_out, m_temp1); ho_bitmap_free (m_temp1); return m_out; }
/** fill a text buffer with fonts recognized from a page layout @param l_page the page layout to recognize @param s_text_out the text buffer to fill @param html output format is html @param font_code code for the font to use @param nikud recognize nikud @param progress a progress indicator 0..100 @return FALSE */ int hocr_font_recognition( const ho_layout* l_page, ho_string* s_text_out, HEBOCR_FONT_OPTIONS *font_options, int html, int* progress ) { int block_index; int line_index; int word_index; int font_index; int current_font_number = 0; int number_of_fonts = l_page->number_of_fonts; ho_bitmap *m_text = NULL; ho_bitmap *m_mask = NULL; ho_bitmap *m_font_main_sign = NULL; ho_bitmap *m_font_nikud = NULL; char text_out[200]; const char *font; const char *font_nikud; const char *font_dagesh; const char *font_shin; /* init progress */ *progress = 0; /* did we get a text buffer and a layout ? */ if (!s_text_out || !l_page) return TRUE; /* loop over the layout */ for (block_index = 0; block_index < l_page->n_blocks; block_index++) { /* start of paragraph */ if (html) { /* FIXME: text overflow ?! */ sprintf (text_out, " <div class=\"ocr_par\" id=\"par_%d\" title=\"bbox %d %d %d %d\">\n", block_index + 1, l_page->m_blocks_text[block_index]->x, l_page->m_blocks_text[block_index]->y, l_page->m_blocks_text[block_index]->x + l_page->m_blocks_text[block_index]->width, l_page->m_blocks_text[block_index]->y + l_page->m_blocks_text[block_index]->height); ho_string_cat (s_text_out, text_out); } for (line_index = 0; line_index < l_page->n_lines[block_index]; line_index++) { /* start of line */ /* loop on all the words in this line */ for (word_index = 0; word_index < l_page->n_words[block_index][line_index]; word_index++) { int word_length = l_page->n_fonts[block_index][line_index][word_index]; unsigned char word_end = FALSE; unsigned char word_start = TRUE; int last_char_i = 0; int char_i = 0; /* start of word */ for (font_index = 0; font_index < word_length; font_index++) { word_end = (font_index == (word_length - 1)); word_start = (font_index == 0); /* get font images */ /* get the font */ m_text = ho_layout_get_font_text (l_page, block_index, line_index, word_index, font_index); if (!m_text) return TRUE; /* get font line mask */ m_mask = ho_layout_get_font_line_mask (l_page, block_index, line_index, word_index, font_index); if (!m_mask) return TRUE; /* get font main sign */ m_font_main_sign = ho_font_main_sign (m_text, m_mask); if (!m_font_main_sign) return TRUE; /* recognize font from images */ last_char_i = char_i; font = ho_recognize_font (m_font_main_sign, m_mask, font_options->font_code, font_options->do_linguistics, word_end, word_start, &char_i, last_char_i); /* insert font to text out */ ho_string_cat (s_text_out, font); /* get font nikud */ if (font_options->nikud) { m_font_nikud = ho_bitmap_clone (m_text); if (!m_font_nikud) return TRUE; ho_bitmap_andnot (m_font_nikud, m_font_main_sign); /* recognize font from images */ font_nikud = ho_recognize_nikud (m_font_nikud, m_mask, font_options->font_code, &font_dagesh, &font_shin); /* free bitmaps */ ho_bitmap_free (m_font_nikud); m_font_nikud = NULL; /* insert font nikud to text out */ ho_string_cat (s_text_out, font_shin); ho_string_cat (s_text_out, font_dagesh); ho_string_cat (s_text_out, font_nikud); } /* free bitmaps */ ho_bitmap_free (m_font_main_sign); ho_bitmap_free (m_text); ho_bitmap_free (m_mask); /* this are empty pointers */ m_text = m_mask = m_font_main_sign = NULL; /* update progress */ current_font_number++; *progress = 100 * current_font_number / number_of_fonts; } /* end of word */ ho_string_cat (s_text_out, " "); } /* end of line */ if (html) ho_string_cat (s_text_out, "<br/>\n"); else ho_string_cat (s_text_out, "\n"); } /* end of block */ if (html) ho_string_cat (s_text_out, "<br/>\n </div>\n"); else ho_string_cat (s_text_out, "\n"); } return FALSE; }
ho_bitmap * ho_bitmap_filter_obj_extend_lateraly (const ho_bitmap * m, const int ext_width) { ho_objmap *m_obj; ho_bitmap *m_temp; ho_bitmap *m_out; int x, y; int index; int width, height; m_temp = ho_bitmap_clone (m); if (!m_temp) return NULL; /* loop over all objects and extend them lateraly */ /* allocate memory */ m_obj = ho_objmap_new_from_bitmap (m_temp); if (!m_obj) { ho_bitmap_free (m_temp); return NULL; } /* draw stopers */ for (index = 0; index < m_obj->obj_list->size; index++) { x = (((m_obj->obj_list)->objects)[index]).x; y = (((m_obj->obj_list)->objects)[index]).y; width = (((m_obj->obj_list)->objects)[index]).width; height = (((m_obj->obj_list)->objects)[index]).height; if (x - ext_width < 0) x = ext_width; if (x + width + ext_width >= m->width) width = m->width - x - ext_width - 1; ho_bitmap_draw_vline (m_temp, x - ext_width, y, height); ho_bitmap_draw_vline (m_temp, x + width + ext_width, y, height); } /* extend */ m_out = ho_bitmap_hlink (m_temp, 7 * ext_width / 4); ho_bitmap_free (m_temp); if (!m_out) return NULL; /* delete stopers */ for (index = 0; index < m_obj->obj_list->size; index++) { x = (((m_obj->obj_list)->objects)[index]).x; y = (((m_obj->obj_list)->objects)[index]).y; width = (((m_obj->obj_list)->objects)[index]).width; height = (((m_obj->obj_list)->objects)[index]).height; if (x - ext_width < 0) x = ext_width; if (x + width + ext_width >= m->width) width = m->width - x - ext_width - 1; ho_bitmap_delete_vline (m_out, x - ext_width, y, height); ho_bitmap_delete_vline (m_out, x + width + ext_width, y, height); } /* set origin */ m_out->x = m->x; m_out->y = m->y; m_out->type = m->type; m_out->font_height = m->font_height; m_out->font_width = m->font_width; m_out->font_spacing = m->font_spacing; m_out->line_spacing = m->line_spacing; m_out->avg_line_fill = m->avg_line_fill; m_out->com_line_fill = m->com_line_fill; m_out->nikud = m->nikud; ho_objmap_free (m_obj); return m_out; }