void match_ctx_init(match_ctx ctx, /* IN/OUT */ struct membuf *inbuf, /* IN */ int max_offset) { struct match_node *np; struct progress prog[1]; int buf_len = membuf_memlen(inbuf); const unsigned char *buf = membuf_get(inbuf); int c, i; int val; ctx->info = calloc(buf_len + 1, sizeof(*ctx->info)); ctx->rle = calloc(buf_len + 1, sizeof(*ctx->rle)); ctx->rle_r = calloc(buf_len + 1, sizeof(*ctx->rle_r)); chunkpool_init(ctx->m_pool, sizeof(match)); ctx->max_offset = max_offset; ctx->buf = buf; ctx->len = buf_len; val = buf[0]; for (i = 1; i < buf_len; ++i) { if (buf[i] == val) { int len = ctx->rle[i - 1] + 1; if(len > 65535) { len = 0; } ctx->rle[i] = len; } else { ctx->rle[i] = 0; } val = buf[i]; } for (i = buf_len - 2; i >= 0; --i) { if (ctx->rle[i] < ctx->rle[i + 1]) { ctx->rle_r[i] = ctx->rle_r[i + 1] + 1; } else { ctx->rle_r[i] = 0; } } /* add extra nodes to rle sequences */ for(c = 0; c < 256; ++c) { static char rle_map[65536]; struct match_node *prev_np; unsigned short int rle_len; /* for each possible rle char */ memset(rle_map, 0, sizeof(rle_map)); prev_np = NULL; for (i = 0; i < buf_len; ++i) { /* must be the correct char */ if(buf[i] != c) { continue; } rle_len = ctx->rle[i]; if(!rle_map[rle_len] && ctx->rle_r[i] > 16) { /* no previous lengths and not our primary length*/ continue; } np = chunkpool_malloc(ctx->m_pool); np->index = i; np->next = NULL; rle_map[rle_len] = 1; LOG(LOG_DUMP, ("0) c = %d, added np idx %d -> %d\n", c, i, 0)); /* if we have a previous entry, let's chain it together */ if(prev_np != NULL) { LOG(LOG_DUMP, ("1) c = %d, pointed np idx %d -> %d\n", c, prev_np->index, i)); prev_np->next = np; } ctx->info[i]->single = np; prev_np = np; } memset(rle_map, 0, sizeof(rle_map)); prev_np = NULL; for (i = buf_len - 1; i >= 0; --i) { /* must be the correct char */ if(buf[i] != c) { continue; } rle_len = ctx->rle_r[i]; np = ctx->info[i]->single; if(np == NULL) { if(rle_map[rle_len] && prev_np != NULL && rle_len > 0) { np = chunkpool_malloc(ctx->m_pool); np->index = i; np->next = prev_np; ctx->info[i]->single = np; LOG(LOG_DEBUG, ("2) c = %d, added np idx %d -> %d\n", c, i, prev_np->index)); } } else { prev_np = np; } if(ctx->rle_r[i] > 0) { continue; } rle_len = ctx->rle[i] + 1; rle_map[rle_len] = 1; } } progress_init(prog, "building.directed.acyclic.graph.", buf_len - 1, 0); for (i = buf_len - 1; i >= 0; --i) { const_matchp matches; /* let's populate the cache */ matches = matches_calc(ctx, i); /* add to cache */ ctx->info[i]->cache = matches; progress_bump(prog, i); } LOG(LOG_NORMAL, ("\n")); progress_free(prog); }
struct search_node* search_buffer(match_ctx ctx, /* IN */ encode_match_f * f, /* IN */ encode_match_data emd, /* IN */ int use_literal_sequences) { struct progress prog[1]; struct search_node *sn_arr; const_matchp mp = NULL; struct search_node *snp; struct search_node *best_copy_snp; int best_copy_len; struct search_node *best_rle_snp; int len = ctx->len + 1; progress_init(prog, "finding.shortest.path.",len, 0); sn_arr = malloc(len * sizeof(struct search_node)); memset(sn_arr, 0, len * sizeof(struct search_node)); --len; snp = &sn_arr[len]; snp->index = len; snp->match->offset = 0; snp->match->len = 0; snp->total_offset = 0; snp->total_score = 0; snp->prev = NULL; best_copy_snp = snp; best_copy_len = 0.0; best_rle_snp = NULL; /* think twice about changing this code, * it works the way it is. The last time * I examined this code I was certain it was * broken and broke it myself, trying to fix it. */ while (len > 0 && (mp = matches_get(ctx, len - 1)) != NULL) { float prev_score; float prev_offset_sum; if(use_literal_sequences) { /* check if we can do even better with copy */ snp = &sn_arr[len]; if(best_copy_snp->total_score+best_copy_len * 8.0 - snp->total_score > 0.0 || best_copy_len > 65535) { /* found a better copy endpoint */ LOG(LOG_DEBUG, ("best copy start moved to index %d\n", snp->index)); best_copy_snp = snp; best_copy_len = 0.0; } else { float copy_score = best_copy_len * 8.0 + (1.0 + 17.0 + 17.0); float total_copy_score = best_copy_snp->total_score + copy_score; LOG(LOG_DEBUG, ("total score %0.1f, copy total score %0.1f\n", snp->total_score, total_copy_score)); if(snp->total_score > total_copy_score ) { match local_mp; /* here it is good to just copy instead of crunch */ LOG(LOG_DEBUG, ("copy index %d, len %d, total %0.1f, copy %0.1f\n", snp->index, best_copy_len, snp->total_score, total_copy_score)); local_mp->len = best_copy_len; local_mp->offset = 0; local_mp->next = NULL; snp->total_score = total_copy_score; snp->total_offset = best_copy_snp->total_offset; snp->prev = best_copy_snp; *snp->match = *local_mp; } } /* end of copy optimization */ } /* check if we can do rle */ snp = &sn_arr[len]; if(best_rle_snp == NULL || snp->index + 65535 < best_rle_snp->index || snp->index + ctx->rle_r[snp->index] < best_rle_snp->index) { /* best_rle_snp can't be reached by rle from snp, reset it*/ if(ctx->rle[snp->index] > 0) { best_rle_snp = snp; LOG(LOG_DEBUG, ("resetting best_rle at index %d, len %d\n", snp->index, ctx->rle[snp->index])); } else { best_rle_snp = NULL; } } else if(ctx->rle[snp->index] > 0 && snp->index + ctx->rle_r[snp->index] >= best_rle_snp->index) { float best_rle_score; float total_best_rle_score; float snp_rle_score; float total_snp_rle_score; match rle_mp; LOG(LOG_DEBUG, ("challenger len %d, index %d, " "ruling len %d, index %d\n", ctx->rle_r[snp->index], snp->index, ctx->rle_r[best_rle_snp->index], best_rle_snp->index)); /* snp and best_rle_snp is the same rle area, * let's see which is best */ rle_mp->len = ctx->rle[best_rle_snp->index]; rle_mp->offset = 1; best_rle_score = f(rle_mp, emd, NULL); total_best_rle_score = best_rle_snp->total_score + best_rle_score; rle_mp->len = ctx->rle[snp->index]; rle_mp->offset = 1; snp_rle_score = f(rle_mp, emd, NULL); total_snp_rle_score = snp->total_score + snp_rle_score; if(total_snp_rle_score <= total_best_rle_score) { /* yes, the snp is a better rle than best_rle_snp */ LOG(LOG_DEBUG, ("prospect len %d, index %d, (%0.1f+%0.1f) " "ruling len %d, index %d (%0.1f+%0.1f)\n", ctx->rle[snp->index], snp->index, snp->total_score, snp_rle_score, ctx->rle[best_rle_snp->index], best_rle_snp->index, best_rle_snp->total_score, best_rle_score)); best_rle_snp = snp; LOG(LOG_DEBUG, ("setting current best_rle: " "index %d, len %d\n", snp->index, rle_mp->len)); } } if(best_rle_snp != NULL && best_rle_snp != snp) { float rle_score; float total_rle_score; /* check if rle is better */ match local_mp; local_mp->len = best_rle_snp->index - snp->index; local_mp->offset = 1; rle_score = f(local_mp, emd, NULL); total_rle_score = best_rle_snp->total_score + rle_score; LOG(LOG_DEBUG, ("comparing index %d (%0.1f) with " "rle index %d, len %d, total score %0.1f %0.1f\n", snp->index, snp->total_score, best_rle_snp->index, local_mp->len, best_rle_snp->total_score, rle_score)); if(snp->total_score > total_rle_score) { /*here it is good to do rle instead of crunch */ LOG(LOG_DEBUG, ("rle index %d, len %d, total %0.1f, rle %0.1f\n", snp->index, local_mp->len, snp->total_score, total_rle_score)); snp->total_score = total_rle_score; snp->total_offset = best_rle_snp->total_offset + 1; snp->prev = best_rle_snp; *snp->match = *local_mp; } } /* end of rle optimization */ LOG(LOG_DUMP, ("matches for index %d with total score %0.1f\n", len - 1, snp->total_score)); prev_score = sn_arr[len].total_score; prev_offset_sum = sn_arr[len].total_offset; while (mp != NULL) { matchp next; int end_len; match tmp; int bucket_len_start; float score; next = mp->next; end_len = 1; *tmp = *mp; tmp->next = NULL; bucket_len_start = 0; for(tmp->len = mp->len; tmp->len >= end_len; --(tmp->len)) { float total_score; unsigned int total_offset; struct encode_match_buckets match_buckets; LOG(LOG_DUMP, ("mp[%d, %d], tmp[%d, %d]\n", mp->offset, mp->len, tmp->offset, tmp->len)); if (bucket_len_start == 0 || tmp->len < 3 || tmp->len < bucket_len_start) { score = f(tmp, emd, &match_buckets); bucket_len_start = match_buckets.len.start; } total_score = prev_score + score; total_offset = prev_offset_sum + tmp->offset; snp = &sn_arr[len - tmp->len]; LOG(LOG_DUMP, ("[%05d] cmp [%05d, %05d score %.1f + %.1f] with %.1f", len, tmp->offset, tmp->len, prev_score, score, snp->total_score)); if ((total_score < 100000000.0) && (snp->match->len == 0 || total_score < snp->total_score || (total_score == snp->total_score && (tmp->offset == 0 || (snp->match->len == tmp->len && (total_offset <= snp->total_offset)))))) { LOG(LOG_DUMP, (", replaced")); snp->index = len - tmp->len; *snp->match = *tmp; snp->total_offset = total_offset; snp->total_score = total_score; snp->prev = &sn_arr[len]; } LOG(LOG_DUMP, ("\n")); } LOG(LOG_DUMP, ("tmp->len %d, ctx->rle[%d] %d\n", tmp->len, len - tmp->len, ctx->rle[len - tmp->len])); mp = next; } /* slow way to get to the next node for cur */ --len; ++best_copy_len; if(sn_arr[len].match == NULL) { LOG(LOG_ERROR, ("Found unreachable node at len %d.\n", len)); } progress_bump(prog, len); } if(len > 0 && mp == NULL) { LOG(LOG_ERROR, ("No matches at len %d.\n", len)); } LOG(LOG_NORMAL, ("\n")); progress_free(prog); return sn_arr; }