PageInfo *analyse_page (PopplerDocument *doc, guint page_num) { PopplerPage *page; PageInfo *info; GdkPixbuf *image; double width_points, height_points; int width, height; gboolean *white_rows, *white_cols; page = poppler_document_get_page (doc, page_num); if (!page) { g_error ("Couldn't open page %d of document", page_num); } /* There are 72 points in an inch. So width and height should be * multiplied by settings.dpi / 72.0 */ poppler_page_get_size (page, &width_points, &height_points); width = (int) ((width_points * settings.dpi / 72.0) + 0.5); height = (int) ((height_points * settings.dpi / 72.0) + 0.5); image = gdk_pixbuf_new(GDK_COLORSPACE_RGB, TRUE, 8, width, height); if (!image) { g_error ("Couldn't create an image (size %d x %d) for page %d", width, height, page_num); } poppler_page_render_to_pixbuf (page, 0, 0, width, height, settings.dpi / 72.0, 0, image); g_object_unref (page); find_white (image, &white_rows, &white_cols); g_object_unref (image); guint firstrow, lastrow, hunkscount; HunkData* hunks = find_hunks (white_rows, height, &firstrow, &lastrow, &hunkscount); info = g_new (PageInfo, 1); info->bbox.x = first_zero (white_cols, width); info->bbox.width = last_zero (white_cols, width) - info->bbox.x; if (info->bbox.width <= 0) { g_error ("Empty page (%d)? Couldn't find a nonwhite column.", page_num); } info->bbox.y = firstrow; info->bbox.height = lastrow - firstrow; info->num_hunks = hunkscount; info->hunks = hunks; g_free (white_rows); g_free (white_cols); return info; }
int bitfield_get_first_zero(struct bitfield *bf) { size_t i; for (i = 0; i < (bf->max_bits + 7) / 8; i++) { if (bf->bits[i] != 0xff) break; } if (i == (bf->max_bits + 7) / 8) return -1; i = i * 8 + first_zero(bf->bits[i]); if (i >= bf->max_bits) return -1; return i; }
static HunkData* find_hunks (const gboolean *lines, guint length, guint* bbstart, guint* bbend, guint* count) { g_assert(lines && bbstart && bbend && count); *count = num_enclosed_white_regions (lines, length) + 1; guint i, start, hunk_n = 0; gboolean last = FALSE; HunkData *hunks = g_new(HunkData, *count); // Set i to be the first non-white line. i = first_zero (lines, length); *bbstart = i; start = i; // Walk down the lines, setting HunkData as we go. for (; i < length; i++) { if (*(lines + i)) { if (!last) { (hunks+hunk_n)->start = start; (hunks+hunk_n)->end = i; hunk_n++; *bbend = i-1; } } else { if (last) { start = i; } } last = *(lines + i); } // If hunk_n == 0 but *count > 0 then *count == 1 and there were no // white lines at all. In which case, set (hunks+0)->start and end. if ((hunk_n == 0) && (*count > 0)) { g_assert (*count == 1); (hunks+0)->start = 0; (hunks+0)->end = length; } return hunks; }