static int blits_intersect_or_touch(mdjvu_image_t image, int32 b1, int32 b2) { int32 x1 = mdjvu_image_get_blit_x(image, b1); int32 x2 = mdjvu_image_get_blit_x(image, b2); int32 y1 = mdjvu_image_get_blit_y(image, b1); int32 y2 = mdjvu_image_get_blit_y(image, b2); mdjvu_bitmap_t bitmap1 = mdjvu_image_get_blit_bitmap(image, b1); mdjvu_bitmap_t bitmap2 = mdjvu_image_get_blit_bitmap(image, b2); int32 w1 = mdjvu_bitmap_get_width(bitmap1); int32 w2 = mdjvu_bitmap_get_width(bitmap2); int32 h1 = mdjvu_bitmap_get_height(bitmap1); int32 h2 = mdjvu_bitmap_get_height(bitmap2); return segments_intersect_or_touch(x1, w1, x2, w2) && segments_intersect_or_touch(y1, h1, y2, h2); }
mdjvu_image_t mdjvu_split(mdjvu_bitmap_t bitmap, int32 dpi, mdjvu_split_options_t opt) { int32 width = mdjvu_bitmap_get_width(bitmap); int32 height = mdjvu_bitmap_get_height(bitmap); mdjvu_image_t result = mdjvu_image_create(width, height); mdjvu_image_enable_suspiciously_big_flags(result); mdjvu_image_set_resolution(result, dpi); add_to_image(result, bitmap, dpi, opt, 0, 0, /* big: */ 0); return result; }
mdjvu_pattern_t mdjvu_pattern_create(mdjvu_bitmap_t bitmap) { int32 w = mdjvu_bitmap_get_width(bitmap); int32 h = mdjvu_bitmap_get_height(bitmap); mdjvu_pattern_t pattern; byte **pixels = mdjvu_create_2d_array(w, h); mdjvu_bitmap_unpack_all(bitmap, pixels); pattern = mdjvu_pattern_create_from_array(pixels, w, h); mdjvu_destroy_2d_array(pixels); return pattern; }
MDJVU_IMPLEMENT mdjvu_bitmap_t mdjvu_get_erosion_mask(mdjvu_bitmap_t bmp) { int32 w = mdjvu_bitmap_get_width(bmp); int32 h = mdjvu_bitmap_get_height(bmp); mdjvu_bitmap_t result = mdjvu_bitmap_create(w, h); int32 i; unsigned char *u, *t, *l, *r; if (h < 3) return result; u = (unsigned char *) malloc(w); /* upper row */ t = (unsigned char *) malloc(w); /* this row */ l = (unsigned char *) malloc(w); /* lower row */ r = (unsigned char *) malloc(w); /* result */ mdjvu_bitmap_unpack_row_0_or_1(bmp, t, 0); mdjvu_bitmap_unpack_row_0_or_1(bmp, l, 1); for (i = 1; i < h - 1; i++) { unsigned char *tmp = u; u = t; t = l; l = tmp; mdjvu_bitmap_unpack_row_0_or_1(bmp, l, i + 1); get_erosion_candidates_in_a_row(r, u, t, l, w); mdjvu_bitmap_pack_row(result, r, i); } free(u); free(t); free(l); free(r); return result; }
MDJVU_IMPLEMENT void mdjvu_smooth(mdjvu_bitmap_t b) { int32 w = mdjvu_bitmap_get_width(b); int32 h = mdjvu_bitmap_get_height(b); int32 i; unsigned char *u, *t, *l, *r; if (h < 3) return; u = (unsigned char *) calloc(w + 2, 1) + 1; /* upper row */ t = (unsigned char *) calloc(w + 2, 1) + 1; /* this row */ l = (unsigned char *) calloc(w + 2, 1) + 1; /* lower row */ r = (unsigned char *) malloc(w); /* result */ mdjvu_bitmap_unpack_row_0_or_1(b, l, 0); for (i = 0; i < h; i++) { unsigned char *tmp = u; u = t; t = l; l = tmp; if (i + 1 < h) mdjvu_bitmap_unpack_row_0_or_1(b, l, i + 1); else memset(l, 0, w); smooth_row(r, u, t, l, w); mdjvu_bitmap_pack_row(b, r, i); } free(u - 1); free(t - 1); free(l - 1); free(r); }
MDJVU_IMPLEMENT void mdjvu_sort_blits(mdjvu_image_t img) { /* We're going to sort only blits with `is_a_letter' flag set. */ int32 char_blit_count = 0; int32 blit_count, i, j, maxtopchange, ccno; BlitPassport *bps; int32 *bottoms, *passport_of_blit; if (!mdjvu_image_has_not_a_letter_flags(img)) mdjvu_calculate_not_a_letter_flags(img); /* Count letter blits */ blit_count = mdjvu_image_get_blit_count(img); for (i = 0; i < blit_count; i++) { mdjvu_bitmap_t bmp = mdjvu_image_get_blit_bitmap(img, i); if (!mdjvu_image_get_not_a_letter_flag(img, bmp)) char_blit_count++; } if (char_blit_count < 2) return; /* Allocate `bps' and `bottoms' arrays */ bps = (BlitPassport *) malloc(char_blit_count * sizeof(BlitPassport)); bottoms = (int32 *) malloc(char_blit_count * sizeof(int32)); /* Fill in `bps' with character blit passports */ j = 0; for (i = 0; i < blit_count; i++) { mdjvu_bitmap_t bmp = mdjvu_image_get_blit_bitmap(img, i); if (!mdjvu_image_get_not_a_letter_flag(img, bmp)) { int32 x = bps[j].left = mdjvu_image_get_blit_x(img, i); int32 y = bps[j].top = mdjvu_image_get_blit_y(img, i);; bps[j].right = x + mdjvu_bitmap_get_width(bmp) - 1; bps[j].bottom = y + mdjvu_bitmap_get_height(bmp) - 1; bps[j].original_index = i; j++; } } /* Sort the BlitPassports list in top-to-bottom order. */ qsort(bps, char_blit_count, sizeof(BlitPassport), &compare_top_edges_downward); /* Subdivide the ccarray list roughly into text lines [LYB] */ /* Determine maximal top deviation */ maxtopchange = mdjvu_image_get_width(img) / 40; if (maxtopchange < 32) maxtopchange = 32; /* Loop until processing all ccs */ ccno = 0; while (ccno < char_blit_count) /* ccno will be increasing constantly */ { /* Gather first line approximation */ int32 sublist_top = bps[ccno].top; int32 sublist_bottom = bps[ccno].bottom; int32 nccno; /* nccno will be at least ccno + 1, * or otherwise we're hung. */ for (nccno = ccno; nccno < char_blit_count; nccno++) { int32 bottom; if (bps[nccno].top > sublist_bottom) break; if (bps[nccno].top > sublist_top + maxtopchange) break; bottom = bps[nccno].bottom; bottoms[nccno - ccno] = bottom; if (bottom > sublist_bottom) sublist_bottom = bottom; } /* If more than one candidate cc for the line */ if (nccno > ccno + 1) { /* Compute median bottom */ int32 bottom; qsort(bottoms, nccno - ccno, sizeof(int32), &compare_integers_reversed); bottom = bottoms[ (nccno - ccno - 1) / 2 ]; /* Compose final line */ for (nccno = ccno; nccno < char_blit_count; nccno++) if (bps[nccno].top > bottom) break; /* Sort final line */ qsort(bps + ccno, nccno - ccno, sizeof(BlitPassport), &compare_left_edges_rightward); } /* Next line */ ccno = nccno; } /* Permute the blits according to `bps' */ passport_of_blit = (int32 *) malloc(blit_count * sizeof(int32)); for (i = 0; i < blit_count; i++) passport_of_blit[i] = -1; for (i = 0; i < char_blit_count; i++) passport_of_blit[bps[i].original_index] = i; /* We'll maintain that bps[i].original_index points to the same blit */ for (i = 0; i < char_blit_count; i++) { int32 blit_to_put_here = bps[i].original_index; mdjvu_image_exchange_blits(img, blit_to_put_here, i); if (passport_of_blit[i] != -1) bps[passport_of_blit[i]].original_index = blit_to_put_here; passport_of_blit[blit_to_put_here] = passport_of_blit[i]; } free(passport_of_blit); free(bps); free(bottoms); }
static void add_to_image(mdjvu_image_t image, mdjvu_bitmap_t bitmap, int32 dpi, mdjvu_split_options_t opt, int32 blit_shift_x, int32 blit_shift_y, int big) { int32 max_shape_size = opt ? * (int32 *) opt : 0; int32 width = mdjvu_bitmap_get_width(bitmap); int32 height = mdjvu_bitmap_get_height(bitmap); unsigned char **buf, **window_base, **window_buf, **map; int32 window_shift = 0, y = 0, i; if (!max_shape_size) max_shape_size = dpi; if (max_shape_size > height) max_shape_size = height; /* n-th line will be unpacked into buf[n % max_shape_size] + 1. * ( +1 is to make the left margin) * buf[max_shape_size] will always be blank. * * window_base[window_shift - 1] * points to buf[max_shape_size] + 1 (blank line) - top margin * window_base[window_shift + max_shape_size] * points to buf[max_shape_size] + 1 (blank line) - bottom margin * window_base[window_shift + i] * points to buf[(window_shift + i) % max_shape_size] + 1. */ /* map has the right margin of 1 */ map = mdjvu_create_2d_array(width + 1, max_shape_size); /* buf has left, right and bottom margins of 1 */ buf = mdjvu_create_2d_array(width + 2, max_shape_size + 1); window_buf = (unsigned char **) malloc(2 * (max_shape_size + 2) * sizeof(unsigned char *)); window_base = window_buf + 1; /* Unpack initial portion of the bitmap; bind the window to the buffer */ for (i = 0; i < max_shape_size; i++) { window_base[i] = window_base[max_shape_size + i] = buf[i] + 1; mdjvu_bitmap_unpack_row(bitmap, buf[i] + 1, i); } /* Setup top and bottom white margins */ window_base[-1] = window_base[2 * max_shape_size - 1] = buf[max_shape_size] + 1; /* The "window moving" loop. * We're moving a (width x max_shape_size) window through the image. */ while(1) { /* Extract some shapes from the window * (shapes touching the topmost row will be affected). */ unsigned char *top_margin_save = /* make the top margin */ window_base[window_shift - 1]; /* (save what was there) */ unsigned char *bot_margin_save = /* same with the bottom margin */ window_base[window_shift + max_shape_size]; int32 old_window_shift; window_base[window_shift - 1] = buf[max_shape_size] + 1; /* clear them */ window_base[window_shift + max_shape_size] = buf[max_shape_size] + 1; process_row(window_base + window_shift, map, /* index of a row to process: */ 0, width, max_shape_size, image, 0, y, max_shape_size, blit_shift_x, blit_shift_y, dpi, opt, big); window_base[window_shift - 1] = top_margin_save; /* restore margins */ window_base[window_shift + max_shape_size] = bot_margin_save; /* Shift the window */ y++; old_window_shift = window_shift; window_shift = y % max_shape_size; if (y + max_shape_size > height) break; /* Unpack a new row into the bottom window row */ mdjvu_bitmap_unpack_row(bitmap, buf[old_window_shift] + 1, y + max_shape_size - 1); } /* Process the last window fully */ for (i = 0; y + i < height; i++) { process_row(window_base + window_shift, map, /* index of a row to process: */ i, width, max_shape_size, image, 0, y, max_shape_size, blit_shift_x, blit_shift_y, dpi, opt, big); } /* Clean up */ free(window_buf); mdjvu_destroy_2d_array(map); mdjvu_destroy_2d_array(buf); }
/* Margins (1 pixel) from each side are required. */ static void process_row(unsigned char **pixels, unsigned char **map, int32 y, int32 w, int32 h, mdjvu_image_t image, int32 shift_x, int32 shift_y, int32 max_shape_width, int32 blit_shift_x, int32 blit_shift_y, int32 dpi, mdjvu_split_options_t opt, int big) { unsigned char *row = pixels[y]; int32 i; for (i = 0; i < w; i++) { if (row[i]) { int32 min_x, max_x, min_y, max_y, shape_width; /* extract the contour */ mdjvu_bitmap_t bitmap; walk_around_a_black_contour(pixels, map, i, y, &min_x, &max_x, &min_y, &max_y); bitmap = interpret_runs(min_x, max_x, min_y, max_y, map, pixels); shape_width = mdjvu_bitmap_get_width(bitmap); assert(shape_width == max_x - min_x + 1); assert(mdjvu_bitmap_get_height(bitmap) == max_y - min_y + 1); if (shape_width <= max_shape_width) { mdjvu_image_add_bitmap(image, bitmap); mdjvu_image_add_blit(image, shift_x + min_x + blit_shift_x, y + shift_y + blit_shift_y, bitmap); mdjvu_image_set_suspiciously_big_flag(image, bitmap, big); } else { /* further split the bitmap */ int32 number_of_chunks = (shape_width + max_shape_width - 1) / max_shape_width; int32 j; int32 shape_height = mdjvu_bitmap_get_height(bitmap); for (j = 0; j < number_of_chunks; j++) { int32 chunk_x = shape_width * j / number_of_chunks; mdjvu_bitmap_t chunk = mdjvu_bitmap_crop(bitmap, chunk_x, 0, shape_width * (j+1) / number_of_chunks - chunk_x, shape_height ); /* After splitting, some white margins may be left, * or the bitmap may lose connectivity. * Apply the algorithm recursively to the chunk. */ add_to_image(image, chunk, dpi, opt, shift_x + chunk_x + min_x + blit_shift_x, y + shift_y + blit_shift_y, /* big: */ 1); mdjvu_bitmap_destroy(chunk); } mdjvu_bitmap_destroy(bitmap); } /* if (shape_width <= max_shape_width) */ } } }
MDJVU_IMPLEMENT mdjvu_bitmap_t mdjvu_average(mdjvu_bitmap_t *bitmaps, int32 n, int32 *cx, int32 *cy) { int32 i; int32 min_x = 0, min_y = 0, max_x_plus_1 = 0, max_y_plus_1 = 0; int32 *buf; int32 buf_w, buf_h; unsigned char *row; int32 tmp_x, tmp_y; int32 threshold = n / 2; mdjvu_bitmap_t result; if (n == 1) { return mdjvu_bitmap_clone(bitmaps[0]); } for (i = 0; i < n; i++) { int32 w = mdjvu_bitmap_get_width(bitmaps[i]); int32 h = mdjvu_bitmap_get_height(bitmaps[i]); int32 ncx = cx[i] / MDJVU_CENTER_QUANT; int32 ncy = cy[i] / MDJVU_CENTER_QUANT; assert(ncx >= 0 && ncx < w); assert(ncy >= 0 && ncy < h); if (-ncx < min_x) min_x = -ncx; if (-ncy < min_y) min_y = -ncy; if (w-ncx > max_x_plus_1) max_x_plus_1 = w-ncx; if (h-ncy > max_y_plus_1) max_y_plus_1 = h-ncy; } buf_w = max_x_plus_1 - min_x; buf_h = max_y_plus_1 - min_y; buf = (int32 *) calloc(buf_w * buf_h, sizeof(int32)); row = (unsigned char *) malloc(buf_w); /* Now adding the bitmaps to the buffer */ for (i = 0; i < n; i++) { int32 w = mdjvu_bitmap_get_width(bitmaps[i]); int32 h = mdjvu_bitmap_get_height(bitmaps[i]); int32 sx = min_x + cx[i] / MDJVU_CENTER_QUANT, sy = min_y + cy[i] / MDJVU_CENTER_QUANT; int32 x, y; for (y = 0; y < h; y++) { int32 *buf_row = buf + buf_w * (y - sy); mdjvu_bitmap_unpack_row(bitmaps[i], row, y); for (x = 0; x < w; x++) { if (row[x]) buf_row[x - sx]++; } } } result = mdjvu_bitmap_create(buf_w, buf_h); for (i = 0; i < buf_h; i++) { int32 j; for (j = 0; j < buf_w; j++) { row[j] = ( buf[i * buf_w + j] > threshold ? 1 : 0 ); } mdjvu_bitmap_pack_row(result, row, i); } mdjvu_bitmap_remove_margins(result, &tmp_x, &tmp_y); free(row); free(buf); return result; }