long xdl_write_mmfile(mmfile_t *mmf, void const *data, long size) { long wsize, bsize, csize; mmblock_t *wcur; for (wsize = 0; wsize < size;) { if (!(wcur = mmf->wcur) || wcur->size == wcur->bsize || (mmf->flags & XDL_MMF_ATOMIC && wcur->size + size > wcur->bsize)) { bsize = XDL_MAX(mmf->bsize, size); if (!(wcur = (mmblock_t *) malloc(sizeof(mmblock_t) + bsize))) { return wsize; } wcur->size = 0; wcur->bsize = bsize; wcur->next = NULL; if (!mmf->head) mmf->head = wcur; if (mmf->tail) mmf->tail->next = wcur; mmf->tail = wcur; mmf->wcur = wcur; } csize = XDL_MIN(size - wsize, wcur->bsize - wcur->size); memcpy((char *) wcur + sizeof(mmblock_t) + wcur->size, (char const *) data + wsize, csize); wsize += csize; wcur->size += csize; mmf->fsize += csize; } return size; }
static int scanA(struct histindex *index, int line1, int count1) { int ptr, tbl_idx; unsigned int chain_len; struct record **rec_chain, *rec; for (ptr = LINE_END(1); line1 <= ptr; ptr--) { tbl_idx = TABLE_HASH(index, 1, ptr); rec_chain = index->records + tbl_idx; rec = *rec_chain; chain_len = 0; while (rec) { if (CMP(index, 1, rec->ptr, 1, ptr)) { /* * ptr is identical to another element. Insert * it onto the front of the existing element * chain. */ NEXT_PTR(index, ptr) = rec->ptr; rec->ptr = ptr; /* cap rec->cnt at MAX_CNT */ rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); LINE_MAP(index, ptr) = rec; goto continue_scan; } rec = rec->next; chain_len++; } if (chain_len == index->max_chain_length) return -1; /* * This is the first time we have ever seen this particular * element in the sequence. Construct a new chain for it. */ if (!(rec = xdl_cha_alloc(&index->rcha))) return -1; rec->ptr = ptr; rec->cnt = 1; rec->next = *rec_chain; *rec_chain = rec; LINE_MAP(index, ptr) = rec; continue_scan: ; /* no op */ } return 0; }
int xdl_mmfile_cmp(mmfile_t *mmf1, mmfile_t *mmf2) { int cres; long size, bsize1, bsize2, size1, size2; char const *blk1, *cur1, *top1; char const *blk2, *cur2, *top2; if ((cur1 = blk1 = xdl_mmfile_first(mmf1, &bsize1)) != NULL) top1 = blk1 + bsize1; if ((cur2 = blk2 = xdl_mmfile_first(mmf2, &bsize2)) != NULL) top2 = blk2 + bsize2; if (!cur1) { if (!cur2) return 0; return -*cur2; } else if (!cur2) return *cur1; for (;;) { if (cur1 >= top1) { if ((cur1 = blk1 = xdl_mmfile_next(mmf1, &bsize1)) != NULL) top1 = blk1 + bsize1; } if (cur2 >= top2) { if ((cur2 = blk2 = xdl_mmfile_next(mmf2, &bsize2)) != NULL) top2 = blk2 + bsize2; } if (!cur1) { if (!cur2) break; return -*cur2; } else if (!cur2) return *cur1; size1 = top1 - cur1; size2 = top2 - cur2; size = XDL_MIN(size1, size2); if ((cres = memcmp(cur1, cur2, size)) != 0) return cres; cur1 += size; cur2 += size; } return 0; }
long xdl_read_mmfile(mmfile_t *mmf, void *data, long size) { long rsize, csize; char *ptr = data; mmblock_t *rcur; for (rsize = 0, rcur = mmf->rcur; rcur && rsize < size;) { if (mmf->rpos >= rcur->size) { if (!(mmf->rcur = rcur = rcur->next)) break; mmf->rpos = 0; } csize = XDL_MIN(size - rsize, rcur->size - mmf->rpos); memcpy(ptr, (char *) rcur + sizeof(mmblock_t) + mmf->rpos, csize); rsize += csize; ptr += csize; mmf->rpos += csize; } return rsize; }
/* * Early trim initial and terminal matching records. */ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { long i, lim; xrecord_t **recs1, **recs2; recs1 = xdf1->recs; recs2 = xdf2->recs; for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; i++, recs1++, recs2++) if ((*recs1)->ha != (*recs2)->ha) break; xdf1->dstart = xdf2->dstart = i; recs1 = xdf1->recs + xdf1->nrec - 1; recs2 = xdf2->recs + xdf2->nrec - 1; for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) if ((*recs1)->ha != (*recs2)->ha) break; xdf1->dend = xdf1->nrec - i - 1; xdf2->dend = xdf2->nrec - i - 1; return 0; }
long xdl_copy_mmfile(mmfile_t *mmf, long size, xdemitcb_t *ecb) { long rsize, csize; mmblock_t *rcur; mmbuffer_t mb; for (rsize = 0, rcur = mmf->rcur; rcur && rsize < size;) { if (mmf->rpos >= rcur->size) { if (!(mmf->rcur = rcur = rcur->next)) break; mmf->rpos = 0; } csize = XDL_MIN(size - rsize, rcur->size - mmf->rpos); mb.ptr = rcur->ptr + mmf->rpos; mb.size = csize; if (ecb->outf(ecb->priv, &mb, 1) < 0) { return rsize; } rsize += csize; mmf->rpos += csize; } return rsize; }
static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, int line1, int count1, int line2, int count2) { unsigned int b_next = b_ptr + 1; struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)]; unsigned int as, ae, bs, be, np, rc; int should_break; for (; rec; rec = rec->next) { if (rec->cnt > index->cnt) { if (!index->has_common) index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr); continue; } as = rec->ptr; if (!CMP(index, 1, as, 2, b_ptr)) continue; index->has_common = 1; for (;;) { should_break = 0; np = NEXT_PTR(index, as); bs = b_ptr; ae = as; be = bs; rc = rec->cnt; while (line1 < (int)as && line2 < (int)bs && CMP(index, 1, as - 1, 2, bs - 1)) { as--; bs--; if (1 < rc) rc = XDL_MIN(rc, CNT(index, as)); } while ((int)ae < LINE_END(1) && (int)be < LINE_END(2) && CMP(index, 1, ae + 1, 2, be + 1)) { ae++; be++; if (1 < rc) rc = XDL_MIN(rc, CNT(index, ae)); } if (b_next <= be) b_next = be + 1; if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) { lcs->begin1 = as; lcs->begin2 = bs; lcs->end1 = ae; lcs->end2 = be; index->cnt = rc; } if (np == 0) break; while (np <= ae) { np = NEXT_PTR(index, np); if (np == 0) { should_break = 1; break; } } if (should_break) break; as = np; } } return b_next; }
static int xdl_bmerge(mmoffbuffer_t *obf, int n, mmbuffer_t *mbfp, mmoffbuffer_t **probf, int *pnobf) { int i, aobf, nobf; long ooff, off, csize; unsigned long fp, ofp; unsigned char const *data, *top; mmoffbuffer_t *robf, *cobf; if (mbfp->size < XDL_BPATCH_HDR_SIZE) { return -1; } data = (unsigned char const *) mbfp->ptr; top = data + mbfp->size; ofp = xdl_mmob_adler32(obf, n); XDL_LE32_GET(data, fp); data += 4; XDL_LE32_GET(data, csize); data += 4; if (fp != ofp || csize != xdl_mmob_size(obf, n)) { return -1; } aobf = XDL_MOBF_MINALLOC; nobf = 0; if ((robf = (mmoffbuffer_t *) xdl_malloc(aobf * sizeof(mmoffbuffer_t))) == NULL) { return -1; } for (ooff = 0; data < top;) { if (*data == XDL_BDOP_INS) { data++; if ((cobf = xdl_mmob_new(&robf, &nobf, &aobf)) == NULL) { xdl_free(robf); return -1; } cobf->off = ooff; cobf->size = (long) *data++; cobf->ptr = (char *) data; data += cobf->size; ooff += cobf->size; } else if (*data == XDL_BDOP_INSB) { data++; XDL_LE32_GET(data, csize); data += 4; if ((cobf = xdl_mmob_new(&robf, &nobf, &aobf)) == NULL) { xdl_free(robf); return -1; } cobf->off = ooff; cobf->size = csize; cobf->ptr = (char *) data; data += cobf->size; ooff += cobf->size; } else if (*data == XDL_BDOP_CPY) { data++; XDL_LE32_GET(data, off); data += 4; XDL_LE32_GET(data, csize); data += 4; if ((i = xdl_mmob_find_cntr(obf, n, off)) < 0) { xdl_free(robf); return -1; } off -= obf[i].off; for (; i < n && csize > 0; i++, off = 0) { if ((cobf = xdl_mmob_new(&robf, &nobf, &aobf)) == NULL) { xdl_free(robf); return -1; } cobf->off = ooff; cobf->size = XDL_MIN(csize, obf[i].size - off); cobf->ptr = obf[i].ptr + off; ooff += cobf->size; csize -= cobf->size; } if (csize > 0) { xdl_free(robf); return -1; } } else { xdl_free(robf); return -1; } } *probf = robf; *pnobf = nobf; return 0; }
int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, xdemitconf_t const *xecfg) { long s1, s2, e1, e2, lctx; xdchange_t *xch, *xche; long funclineprev = -1; struct func_line func_line = { 0 }; if (xecfg->flags & XDL_EMIT_COMMON) return xdl_emit_common(xe, xscr, ecb, xecfg); for (xch = xscr; xch; xch = xche->next) { xche = xdl_get_hunk(xch, xecfg); s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { long fs1 = get_func_line(xe, xecfg, NULL, xch->i1, -1); if (fs1 < 0) fs1 = 0; if (fs1 < s1) { s2 -= s1 - fs1; s1 = fs1; } } again: lctx = xecfg->ctxlen; lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); e1 = xche->i1 + xche->chg1 + lctx; e2 = xche->i2 + xche->chg2 + lctx; if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { long fe1 = get_func_line(xe, xecfg, NULL, xche->i1 + xche->chg1, xe->xdf1.nrec); if (fe1 < 0) fe1 = xe->xdf1.nrec; if (fe1 > e1) { e2 += fe1 - e1; e1 = fe1; } /* * Overlap with next change? Then include it * in the current hunk and start over to find * its new end. */ if (xche->next) { long l = xche->next->i1; if (l <= e1 || get_func_line(xe, xecfg, NULL, l, e1) < 0) { xche = xche->next; goto again; } } } /* * Emit current hunk header. */ if (xecfg->flags & XDL_EMIT_FUNCNAMES) { get_func_line(xe, xecfg, &func_line, s1 - 1, funclineprev); funclineprev = s1 - 1; } if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, func_line.buf, func_line.len, ecb) < 0) return -1; /* * Emit pre-context. */ for (; s2 < xch->i2; s2++) if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) return -1; for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) { /* * Merge previous with current change atom. */ for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++) if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) return -1; /* * Removes lines from the first file. */ for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++) if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0) return -1; /* * Adds lines from the second file. */ for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++) if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0) return -1; if (xch == xche) break; s1 = xch->i1 + xch->chg1; s2 = xch->i2 + xch->chg2; } /* * Emit post-context. */ for (s2 = xche->i2 + xche->chg2; s2 < e2; s2++) if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) return -1; } return 0; }
int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, xdemitconf_t const *xecfg) { long s1, s2, e1, e2, lctx; xdchange_t *xch, *xche; char funcbuf[80]; long funclen = 0; find_func_t ff = xecfg->find_func ? xecfg->find_func : def_ff; if (xecfg->flags & XDL_EMIT_COMMON) return xdl_emit_common(xe, xscr, ecb, xecfg); for (xch = xche = xscr; xch; xch = xche->next) { xche = xdl_get_hunk(xch, xecfg); s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); lctx = xecfg->ctxlen; lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); e1 = xche->i1 + xche->chg1 + lctx; e2 = xche->i2 + xche->chg2 + lctx; /* * Emit current hunk header. */ if (xecfg->flags & XDL_EMIT_FUNCNAMES) { xdl_find_func(&xe->xdf1, s1, funcbuf, sizeof(funcbuf), &funclen, ff, xecfg->find_func_priv); } if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, funcbuf, funclen, ecb) < 0) return -1; /* * Emit pre-context. */ for (; s1 < xch->i1; s1++) if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) return -1; for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) { /* * Merge previous with current change atom. */ for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++) if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) return -1; /* * Removes lines from the first file. */ for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++) if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0) return -1; /* * Adds lines from the second file. */ for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++) if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0) return -1; if (xch == xche) break; s1 = xch->i1 + xch->chg1; s2 = xch->i2 + xch->chg2; } /* * Emit post-context. */ for (s1 = xche->i1 + xche->chg1; s1 < e1; s1++) if (xdl_emit_record(&xe->xdf1, s1, " ", ecb) < 0) return -1; } return 0; }
/* * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both * the forward diagonal starting from (off1, off2) and the backward diagonal * starting from (lim1, lim2). If the K values on the same diagonal crosses * returns the furthest point of reach. We might end up having to expensive * cases using this algorithm is full, so a little bit of heuristic is needed * to cut the search and to return a suboptimal point. */ static long xdl_split(unsigned long const *ha1, long off1, long lim1, unsigned long const *ha2, long off2, long lim2, long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, xdalgoenv_t *xenv) { long dmin = off1 - lim2, dmax = lim1 - off2; long fmid = off1 - off2, bmid = lim1 - lim2; long odd = (fmid - bmid) & 1; long fmin = fmid, fmax = fmid; long bmin = bmid, bmax = bmid; long ec, d, i1, i2, prev1, best, dd, v, k; /* * Set initial diagonal values for both forward and backward path. */ kvdf[fmid] = off1; kvdb[bmid] = lim1; for (ec = 1;; ec++) { int got_snake = 0; /* * We need to extent the diagonal "domain" by one. If the next * values exits the box boundaries we need to change it in the * opposite direction because (max - min) must be a power of two. * Also we initialize the external K value to -1 so that we can * avoid extra conditions check inside the core loop. */ if (fmin > dmin) kvdf[--fmin - 1] = -1; else ++fmin; if (fmax < dmax) kvdf[++fmax + 1] = -1; else --fmax; for (d = fmax; d >= fmin; d -= 2) { if (kvdf[d - 1] >= kvdf[d + 1]) i1 = kvdf[d - 1] + 1; else i1 = kvdf[d + 1]; prev1 = i1; i2 = i1 - d; for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++); if (i1 - prev1 > xenv->snake_cnt) got_snake = 1; kvdf[d] = i1; if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) { spl->i1 = i1; spl->i2 = i2; spl->min_lo = spl->min_hi = 1; return ec; } } /* * We need to extent the diagonal "domain" by one. If the next * values exits the box boundaries we need to change it in the * opposite direction because (max - min) must be a power of two. * Also we initialize the external K value to -1 so that we can * avoid extra conditions check inside the core loop. */ if (bmin > dmin) kvdb[--bmin - 1] = XDL_LINE_MAX; else ++bmin; if (bmax < dmax) kvdb[++bmax + 1] = XDL_LINE_MAX; else --bmax; for (d = bmax; d >= bmin; d -= 2) { if (kvdb[d - 1] < kvdb[d + 1]) i1 = kvdb[d - 1]; else i1 = kvdb[d + 1] - 1; prev1 = i1; i2 = i1 - d; for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--); if (prev1 - i1 > xenv->snake_cnt) got_snake = 1; kvdb[d] = i1; if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) { spl->i1 = i1; spl->i2 = i2; spl->min_lo = spl->min_hi = 1; return ec; } } if (need_min) continue; /* * If the edit cost is above the heuristic trigger and if * we got a good snake, we sample current diagonals to see * if some of the, have reached an "interesting" path. Our * measure is a function of the distance from the diagonal * corner (i1 + i2) penalized with the distance from the * mid diagonal itself. If this value is above the current * edit cost times a magic factor (XDL_K_HEUR) we consider * it interesting. */ if (got_snake && ec > xenv->heur_min) { for (best = 0, d = fmax; d >= fmin; d -= 2) { dd = d > fmid ? d - fmid: fmid - d; i1 = kvdf[d]; i2 = i1 - d; v = (i1 - off1) + (i2 - off2) - dd; if (v > XDL_K_HEUR * ec && v > best && off1 + xenv->snake_cnt <= i1 && i1 < lim1 && off2 + xenv->snake_cnt <= i2 && i2 < lim2) { for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++) if (k == xenv->snake_cnt) { best = v; spl->i1 = i1; spl->i2 = i2; break; } } } if (best > 0) { spl->min_lo = 1; spl->min_hi = 0; return ec; } for (best = 0, d = bmax; d >= bmin; d -= 2) { dd = d > bmid ? d - bmid: bmid - d; i1 = kvdb[d]; i2 = i1 - d; v = (lim1 - i1) + (lim2 - i2) - dd; if (v > XDL_K_HEUR * ec && v > best && off1 < i1 && i1 <= lim1 - xenv->snake_cnt && off2 < i2 && i2 <= lim2 - xenv->snake_cnt) { for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++) if (k == xenv->snake_cnt - 1) { best = v; spl->i1 = i1; spl->i2 = i2; break; } } } if (best > 0) { spl->min_lo = 0; spl->min_hi = 1; return ec; } } /* * Enough is enough. We spent too much time here and now we collect * the furthest reaching path using the (i1 + i2) measure. */ if (ec >= xenv->mxcost) { long fbest, fbest1, bbest, bbest1; fbest = fbest1 = -1; for (d = fmax; d >= fmin; d -= 2) { i1 = XDL_MIN(kvdf[d], lim1); i2 = i1 - d; if (lim2 < i2) i1 = lim2 + d, i2 = lim2; if (fbest < i1 + i2) { fbest = i1 + i2; fbest1 = i1; } } bbest = bbest1 = XDL_LINE_MAX; for (d = bmax; d >= bmin; d -= 2) { i1 = XDL_MAX(off1, kvdb[d]); i2 = i1 - d; if (i2 < off2) i1 = off2 + d, i2 = off2; if (i1 + i2 < bbest) { bbest = i1 + i2; bbest1 = i1; } } if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) { spl->i1 = fbest1; spl->i2 = fbest - fbest1; spl->min_lo = 1; spl->min_hi = 0; } else { spl->i1 = bbest1; spl->i2 = bbest - bbest1; spl->min_lo = 0; spl->min_hi = 1; } return ec; } } }