Пример #1
0
static int blits_intersect_or_touch(mdjvu_image_t image, int32 b1, int32 b2)
{
    int32 x1 = mdjvu_image_get_blit_x(image, b1);
    int32 x2 = mdjvu_image_get_blit_x(image, b2);
    int32 y1 = mdjvu_image_get_blit_y(image, b1);
    int32 y2 = mdjvu_image_get_blit_y(image, b2);
    mdjvu_bitmap_t bitmap1 = mdjvu_image_get_blit_bitmap(image, b1);
    mdjvu_bitmap_t bitmap2 = mdjvu_image_get_blit_bitmap(image, b2);
    int32 w1 = mdjvu_bitmap_get_width(bitmap1);
    int32 w2 = mdjvu_bitmap_get_width(bitmap2);
    int32 h1 = mdjvu_bitmap_get_height(bitmap1);
    int32 h2 = mdjvu_bitmap_get_height(bitmap2);
    return segments_intersect_or_touch(x1, w1, x2, w2)
        && segments_intersect_or_touch(y1, h1, y2, h2);
}
Пример #2
0
MDJVU_IMPLEMENT void mdjvu_calculate_not_a_letter_flags(mdjvu_image_t image)
{
    int32 i, b;
    assert(mdjvu_image_has_suspiciously_big_flags(image));
    mdjvu_image_enable_not_a_letter_flags(image);
    b = mdjvu_image_get_blit_count(image);
    for (i = 0; i < b; i++)
    {
        mdjvu_bitmap_t bitmap = mdjvu_image_get_blit_bitmap(image, i);
        if (mdjvu_image_get_suspiciously_big_flag(image, bitmap))
            make_no_subst(image, i);
    }
}
Пример #3
0
static void make_no_subst(mdjvu_image_t image, int32 blit)
{
    int32 i, b;
    mdjvu_bitmap_t bitmap = mdjvu_image_get_blit_bitmap(image, blit);
    if (mdjvu_image_get_not_a_letter_flag(image, bitmap)) return;
    mdjvu_image_set_not_a_letter_flag(image, bitmap, 1);

    /* infect all blits that intersect with this */
    b = mdjvu_image_get_blit_count(image);
    for (i = 0; i < b; i++)
    {
        if (blits_intersect_or_touch(image, blit, i))
            make_no_subst(image, i);
    }
}
Пример #4
0
MDJVU_IMPLEMENT void mdjvu_clean(mdjvu_image_t image)
{
    int32 b = mdjvu_image_get_blit_count(image), i;
    int32 dpi = mdjvu_image_get_resolution(image);
    int32 tinysize = dpi*dpi/20000 - 1;
    mdjvu_image_enable_masses(image);
    if (tinysize <= 0) return;

    for (i = 0; i < b; i++)
    {
        mdjvu_bitmap_t bitmap = mdjvu_image_get_blit_bitmap(image, i);
        int32 mass = mdjvu_image_get_mass(image, bitmap);
        /* Don't cleanup blits which were produced as a result of
           splitting larger shapes (such as horizontal rulers) */
        int32 big  = mdjvu_image_get_suspiciously_big_flag(image, bitmap);
        if (mass <= tinysize && !big)
            mdjvu_image_set_blit_bitmap(image, i, NULL);
    }

    mdjvu_image_remove_NULL_blits(image);
    mdjvu_image_remove_unused_bitmaps(image);
}
Пример #5
0
MDJVU_IMPLEMENT void mdjvu_sort_blits(mdjvu_image_t img)
{
    /* We're going to sort only blits with `is_a_letter' flag set. */

    int32 char_blit_count = 0;
    int32 blit_count, i, j, maxtopchange, ccno;
    BlitPassport *bps;
    int32 *bottoms, *passport_of_blit;

    if (!mdjvu_image_has_not_a_letter_flags(img))
        mdjvu_calculate_not_a_letter_flags(img);

    /* Count letter blits */
    blit_count = mdjvu_image_get_blit_count(img);
    for (i = 0; i < blit_count; i++)
    {
        mdjvu_bitmap_t bmp = mdjvu_image_get_blit_bitmap(img, i);
        if (!mdjvu_image_get_not_a_letter_flag(img, bmp))
            char_blit_count++;
    }

    if (char_blit_count < 2) return;

    /* Allocate `bps' and `bottoms' arrays */
    bps = (BlitPassport *) malloc(char_blit_count * sizeof(BlitPassport));
    bottoms = (int32 *) malloc(char_blit_count * sizeof(int32));

    /* Fill in `bps' with character blit passports */
    j = 0;
    for (i = 0; i < blit_count; i++)
    {
        mdjvu_bitmap_t bmp = mdjvu_image_get_blit_bitmap(img, i);
        if (!mdjvu_image_get_not_a_letter_flag(img, bmp))
        {
            int32 x = bps[j].left = mdjvu_image_get_blit_x(img, i);
            int32 y = bps[j].top  = mdjvu_image_get_blit_y(img, i);;
            bps[j].right  = x + mdjvu_bitmap_get_width(bmp)  - 1;
            bps[j].bottom = y + mdjvu_bitmap_get_height(bmp) - 1;
            bps[j].original_index = i;
            j++;
        }
    }

    /* Sort the BlitPassports list in top-to-bottom order. */
    qsort(bps, char_blit_count, sizeof(BlitPassport),
          &compare_top_edges_downward);

    /* Subdivide the ccarray list roughly into text lines [LYB] */
    /* Determine maximal top deviation */
    maxtopchange = mdjvu_image_get_width(img) / 40;
    if (maxtopchange < 32) maxtopchange = 32;

    /* Loop until processing all ccs */
    ccno = 0;
    while (ccno < char_blit_count)  /* ccno will be increasing constantly */
    {
        /* Gather first line approximation */
        int32 sublist_top    = bps[ccno].top;
        int32 sublist_bottom = bps[ccno].bottom;

        int32 nccno;

        /* nccno will be at least ccno + 1,
         * or otherwise we're hung.
         */
        for (nccno = ccno; nccno < char_blit_count; nccno++)
        {
            int32 bottom;
            if (bps[nccno].top > sublist_bottom) break;
            if (bps[nccno].top > sublist_top + maxtopchange) break;
            bottom = bps[nccno].bottom;
            bottoms[nccno - ccno] = bottom;
            if (bottom > sublist_bottom)
                sublist_bottom = bottom;
        }

        /* If more than one candidate cc for the line */
        if (nccno > ccno + 1)
        {
            /* Compute median bottom */
            int32 bottom;
            qsort(bottoms, nccno - ccno, sizeof(int32),
                  &compare_integers_reversed);
            bottom = bottoms[ (nccno - ccno - 1) / 2 ];

            /* Compose final line */
            for (nccno = ccno; nccno < char_blit_count; nccno++)
                if (bps[nccno].top > bottom)
                    break;

            /* Sort final line */
            qsort(bps + ccno, nccno - ccno, sizeof(BlitPassport),
                  &compare_left_edges_rightward);
        }

        /* Next line */
        ccno = nccno;
    }

    /* Permute the blits according to `bps' */
    passport_of_blit = (int32 *) malloc(blit_count * sizeof(int32));
    for (i = 0; i < blit_count; i++)
        passport_of_blit[i] = -1;
    for (i = 0; i < char_blit_count; i++)
        passport_of_blit[bps[i].original_index] = i;

    /* We'll maintain that bps[i].original_index points to the same blit */
    for (i = 0; i < char_blit_count; i++)
    {
        int32 blit_to_put_here = bps[i].original_index;
        mdjvu_image_exchange_blits(img, blit_to_put_here, i);
        if (passport_of_blit[i] != -1)
            bps[passport_of_blit[i]].original_index = blit_to_put_here;
        passport_of_blit[blit_to_put_here] = passport_of_blit[i];
    }

    free(passport_of_blit);
    free(bps);
    free(bottoms);
}