Beispiel #1
0
void match_ctx_init(match_ctx ctx,      /* IN/OUT */
                    struct membuf *inbuf,   /* IN */
                    int max_offset)
{
    struct match_node *np;
    struct progress prog[1];

    int buf_len = membuf_memlen(inbuf);
    const unsigned char *buf = membuf_get(inbuf);

    int c, i;
    int val;

    ctx->info = calloc(buf_len + 1, sizeof(*ctx->info));
    ctx->rle = calloc(buf_len + 1, sizeof(*ctx->rle));
    ctx->rle_r = calloc(buf_len + 1, sizeof(*ctx->rle_r));

    chunkpool_init(ctx->m_pool, sizeof(match));

    ctx->max_offset = max_offset;

    ctx->buf = buf;
    ctx->len = buf_len;

    val = buf[0];
    for (i = 1; i < buf_len; ++i)
    {
        if (buf[i] == val)
        {
            int len = ctx->rle[i - 1] + 1;
            if(len > 65535)
            {
                len = 0;
            }
            ctx->rle[i] = len;
        } else
        {
            ctx->rle[i] = 0;
        }
        val = buf[i];
    }

    for (i = buf_len - 2; i >= 0; --i)
    {
        if (ctx->rle[i] < ctx->rle[i + 1])
        {
            ctx->rle_r[i] = ctx->rle_r[i + 1] + 1;
        } else
        {
            ctx->rle_r[i] = 0;
        }
    }

    /* add extra nodes to rle sequences */
    for(c = 0; c < 256; ++c)
    {
        static char rle_map[65536];
        struct match_node *prev_np;
        unsigned short int rle_len;

        /* for each possible rle char */
        memset(rle_map, 0, sizeof(rle_map));
        prev_np = NULL;
        for (i = 0; i < buf_len; ++i)
        {
            /* must be the correct char */
            if(buf[i] != c)
            {
                continue;
            }

            rle_len = ctx->rle[i];
            if(!rle_map[rle_len] && ctx->rle_r[i] > 16)
            {
                /* no previous lengths and not our primary length*/
                continue;
            }

            np = chunkpool_malloc(ctx->m_pool);
            np->index = i;
            np->next = NULL;
            rle_map[rle_len] = 1;

            LOG(LOG_DUMP, ("0) c = %d, added np idx %d -> %d\n", c, i, 0));

            /* if we have a previous entry, let's chain it together */
            if(prev_np != NULL)
            {
                LOG(LOG_DUMP, ("1) c = %d, pointed np idx %d -> %d\n",
                                c, prev_np->index, i));
                prev_np->next = np;
            }

            ctx->info[i]->single = np;
            prev_np = np;
        }

        memset(rle_map, 0, sizeof(rle_map));
        prev_np = NULL;
        for (i = buf_len - 1; i >= 0; --i)
        {
            /* must be the correct char */
            if(buf[i] != c)
            {
                continue;
            }

            rle_len = ctx->rle_r[i];
            np = ctx->info[i]->single;
            if(np == NULL)
            {
                if(rle_map[rle_len] && prev_np != NULL && rle_len > 0)
                {
                    np = chunkpool_malloc(ctx->m_pool);
                    np->index = i;
                    np->next = prev_np;
                    ctx->info[i]->single = np;

                    LOG(LOG_DEBUG, ("2) c = %d, added np idx %d -> %d\n",
                                    c, i, prev_np->index));
                }
            }
            else
            {
                prev_np = np;
            }

            if(ctx->rle_r[i] > 0)
            {
                continue;
            }
            rle_len = ctx->rle[i] + 1;
            rle_map[rle_len] = 1;
        }
    }

    progress_init(prog, "building.directed.acyclic.graph.", buf_len - 1, 0);

    for (i = buf_len - 1; i >= 0; --i)
    {
        const_matchp matches;

        /* let's populate the cache */
        matches = matches_calc(ctx, i);

        /* add to cache */
        ctx->info[i]->cache = matches;

        progress_bump(prog, i);
    }

    LOG(LOG_NORMAL, ("\n"));

    progress_free(prog);
}
struct search_node*
search_buffer(match_ctx ctx,       /* IN */
              encode_match_f * f,  /* IN */
              encode_match_data emd,       /* IN */
              int use_literal_sequences)
{
    struct progress prog[1];
    struct search_node *sn_arr;
    const_matchp mp = NULL;
    struct search_node *snp;
    struct search_node *best_copy_snp;
    int best_copy_len;

    struct search_node *best_rle_snp;

    int len = ctx->len + 1;

    progress_init(prog, "finding.shortest.path.",len, 0);

    sn_arr = malloc(len * sizeof(struct search_node));
    memset(sn_arr, 0, len * sizeof(struct search_node));

    --len;
    snp = &sn_arr[len];
    snp->index = len;
    snp->match->offset = 0;
    snp->match->len = 0;
    snp->total_offset = 0;
    snp->total_score = 0;
    snp->prev = NULL;

    best_copy_snp = snp;
    best_copy_len = 0.0;

    best_rle_snp = NULL;

    /* think twice about changing this code,
     * it works the way it is. The last time
     * I examined this code I was certain it was
     * broken and broke it myself, trying to fix it. */
    while (len > 0 && (mp = matches_get(ctx, len - 1)) != NULL)
    {
        float prev_score;
        float prev_offset_sum;

        if(use_literal_sequences)
        {
            /* check if we can do even better with copy */
            snp = &sn_arr[len];
            if(best_copy_snp->total_score+best_copy_len * 8.0 -
               snp->total_score > 0.0 || best_copy_len > 65535)
            {
                /* found a better copy endpoint */
                LOG(LOG_DEBUG,
                    ("best copy start moved to index %d\n", snp->index));
                best_copy_snp = snp;
                best_copy_len = 0.0;
            } else
            {
                float copy_score = best_copy_len * 8.0 + (1.0 + 17.0 + 17.0);
                float total_copy_score = best_copy_snp->total_score +
                                         copy_score;

                LOG(LOG_DEBUG,
                    ("total score %0.1f, copy total score %0.1f\n",
                     snp->total_score, total_copy_score));

                if(snp->total_score > total_copy_score )
                {
                    match local_mp;
                    /* here it is good to just copy instead of crunch */

                    LOG(LOG_DEBUG,
                        ("copy index %d, len %d, total %0.1f, copy %0.1f\n",
                         snp->index, best_copy_len,
                         snp->total_score, total_copy_score));

                    local_mp->len = best_copy_len;
                    local_mp->offset = 0;
                    local_mp->next = NULL;
                    snp->total_score = total_copy_score;
                    snp->total_offset = best_copy_snp->total_offset;
                    snp->prev = best_copy_snp;
                    *snp->match = *local_mp;
                }
            }
            /* end of copy optimization */
        }

        /* check if we can do rle */
        snp = &sn_arr[len];
        if(best_rle_snp == NULL ||
           snp->index + 65535 < best_rle_snp->index ||
           snp->index + ctx->rle_r[snp->index] < best_rle_snp->index)
        {
            /* best_rle_snp can't be reached by rle from snp, reset it*/
            if(ctx->rle[snp->index] > 0)
            {
                best_rle_snp = snp;
                LOG(LOG_DEBUG, ("resetting best_rle at index %d, len %d\n",
                                 snp->index, ctx->rle[snp->index]));
            }
            else
            {
                best_rle_snp = NULL;
            }
        }
        else if(ctx->rle[snp->index] > 0 &&
                snp->index + ctx->rle_r[snp->index] >= best_rle_snp->index)
        {
            float best_rle_score;
            float total_best_rle_score;
            float snp_rle_score;
            float total_snp_rle_score;
            match rle_mp;

            LOG(LOG_DEBUG, ("challenger len %d, index %d, "
                             "ruling len %d, index %d\n",
                             ctx->rle_r[snp->index], snp->index,
                             ctx->rle_r[best_rle_snp->index],
                             best_rle_snp->index));

            /* snp and best_rle_snp is the same rle area,
             * let's see which is best */
            rle_mp->len = ctx->rle[best_rle_snp->index];
            rle_mp->offset = 1;
            best_rle_score = f(rle_mp, emd, NULL);
            total_best_rle_score = best_rle_snp->total_score +
                best_rle_score;

            rle_mp->len = ctx->rle[snp->index];
            rle_mp->offset = 1;
            snp_rle_score = f(rle_mp, emd, NULL);
            total_snp_rle_score = snp->total_score + snp_rle_score;

            if(total_snp_rle_score <= total_best_rle_score)
            {
                /* yes, the snp is a better rle than best_rle_snp */
                LOG(LOG_DEBUG, ("prospect len %d, index %d, (%0.1f+%0.1f) "
                                 "ruling len %d, index %d (%0.1f+%0.1f)\n",
                                 ctx->rle[snp->index], snp->index,
                                 snp->total_score, snp_rle_score,
                                 ctx->rle[best_rle_snp->index],
                                 best_rle_snp->index,
                                 best_rle_snp->total_score, best_rle_score));
                best_rle_snp = snp;
                LOG(LOG_DEBUG, ("setting current best_rle: "
                                 "index %d, len %d\n",
                                 snp->index, rle_mp->len));
            }
        }
        if(best_rle_snp != NULL && best_rle_snp != snp)
        {
            float rle_score;
            float total_rle_score;
            /* check if rle is better */
            match local_mp;
            local_mp->len = best_rle_snp->index - snp->index;
            local_mp->offset = 1;

            rle_score = f(local_mp, emd, NULL);
            total_rle_score = best_rle_snp->total_score + rle_score;

            LOG(LOG_DEBUG, ("comparing index %d (%0.1f) with "
                             "rle index %d, len %d, total score %0.1f %0.1f\n",
                             snp->index, snp->total_score,
                             best_rle_snp->index, local_mp->len,
                             best_rle_snp->total_score, rle_score));

            if(snp->total_score > total_rle_score)
            {
                /*here it is good to do rle instead of crunch */
                LOG(LOG_DEBUG,
                    ("rle index %d, len %d, total %0.1f, rle %0.1f\n",
                     snp->index, local_mp->len,
                     snp->total_score, total_rle_score));

                snp->total_score = total_rle_score;
                snp->total_offset = best_rle_snp->total_offset + 1;
                snp->prev = best_rle_snp;

                *snp->match = *local_mp;
            }
        }
        /* end of rle optimization */

        LOG(LOG_DUMP,
            ("matches for index %d with total score %0.1f\n",
             len - 1, snp->total_score));

        prev_score = sn_arr[len].total_score;
        prev_offset_sum = sn_arr[len].total_offset;
        while (mp != NULL)
        {
            matchp next;
            int end_len;
            match tmp;
            int bucket_len_start;
            float score;

            next = mp->next;
            end_len = 1;
            *tmp = *mp;
            tmp->next = NULL;
            bucket_len_start = 0;
            for(tmp->len = mp->len; tmp->len >= end_len; --(tmp->len))
            {
                float total_score;
                unsigned int total_offset;
                struct encode_match_buckets match_buckets;

                LOG(LOG_DUMP, ("mp[%d, %d], tmp[%d, %d]\n",
                               mp->offset, mp->len,
                               tmp->offset, tmp->len));
                if (bucket_len_start == 0 ||
                    tmp->len < 3 ||
                    tmp->len < bucket_len_start)
                {
                    score = f(tmp, emd, &match_buckets);
                    bucket_len_start = match_buckets.len.start;
                }

                total_score = prev_score + score;
                total_offset = prev_offset_sum + tmp->offset;
                snp = &sn_arr[len - tmp->len];

                LOG(LOG_DUMP,
                    ("[%05d] cmp [%05d, %05d score %.1f + %.1f] with %.1f",
                     len, tmp->offset, tmp->len,
                     prev_score, score, snp->total_score));

                if ((total_score < 100000000.0) &&
                    (snp->match->len == 0 ||
                     total_score < snp->total_score ||
                     (total_score == snp->total_score &&
                      (tmp->offset == 0 ||
                       (snp->match->len == tmp->len &&
                        (total_offset <= snp->total_offset))))))
                {
                    LOG(LOG_DUMP, (", replaced"));
                    snp->index = len - tmp->len;

                    *snp->match = *tmp;
                    snp->total_offset = total_offset;
                    snp->total_score = total_score;
                    snp->prev = &sn_arr[len];
                }
                LOG(LOG_DUMP, ("\n"));
            }
            LOG(LOG_DUMP, ("tmp->len %d, ctx->rle[%d] %d\n",
                           tmp->len, len - tmp->len,
                           ctx->rle[len - tmp->len]));

            mp = next;
        }

        /* slow way to get to the next node for cur */
        --len;
        ++best_copy_len;
        if(sn_arr[len].match == NULL)
        {
            LOG(LOG_ERROR, ("Found unreachable node at len %d.\n", len));
        }

        progress_bump(prog, len);
    }
    if(len > 0 && mp == NULL)
    {
        LOG(LOG_ERROR, ("No matches at len %d.\n", len));
    }
    LOG(LOG_NORMAL, ("\n"));

    progress_free(prog);

    return sn_arr;
}