int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *xe) { long ndiags; long *kvd, *kvdf, *kvdb; xdalgoenv_t xenv; diffdata_t dd1, dd2; if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF) return xdl_do_patience_diff(mf1, mf2, xpp, xe); if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) return xdl_do_histogram_diff(mf1, mf2, xpp, xe); if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { return -1; } /* * Allocate and setup K vectors to be used by the differential algorithm. * One is to store the forward path and one to store the backward path. */ ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { xdl_free_env(xe); return -1; } kvdf = kvd; kvdb = kvdf + ndiags; kvdf += xe->xdf2.nreff + 1; kvdb += xe->xdf2.nreff + 1; xenv.mxcost = xdl_bogosqrt(ndiags); if (xenv.mxcost < XDL_MAX_COST_MIN) xenv.mxcost = XDL_MAX_COST_MIN; xenv.snake_cnt = XDL_SNAKE_CNT; xenv.heur_min = XDL_HEUR_MIN_COST; dd1.nrec = xe->xdf1.nreff; dd1.ha = xe->xdf1.ha; dd1.rchg = xe->xdf1.rchg; dd1.rindex = xe->xdf1.rindex; dd2.nrec = xe->xdf2.nreff; dd2.ha = xe->xdf2.ha; dd2.rchg = xe->xdf2.rchg; dd2.rindex = xe->xdf2.rindex; if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) { xdl_free(kvd); xdl_free_env(xe); return -1; } xdl_free(kvd); return 0; }
/* * Try to reduce the problem complexity, discard records that have no * matches on the other file. Also, lines that have multiple matches * might be potentially discarded if they happear in a run of discardable. */ static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) { long i, nm, rhi, nreff, mlim; unsigned long hav; xrecord_t **recs; xrecord_t *rec; char *dis, *dis1, *dis2; if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { return -1; } memset(dis, 0, xdf1->nrec + xdf2->nrec + 2); dis1 = dis; dis2 = dis1 + xdf1->nrec + 1; if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { hav = (*recs)->ha; rhi = (long) XDL_HASHLONG(hav, xdf2->hbits); for (nm = 0, rec = xdf2->rhash[rhi]; rec; rec = rec->next) if (rec->ha == hav && ++nm == mlim) break; dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; } if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { hav = (*recs)->ha; rhi = (long) XDL_HASHLONG(hav, xdf1->hbits); for (nm = 0, rec = xdf1->rhash[rhi]; rec; rec = rec->next) if (rec->ha == hav && ++nm == mlim) break; dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; } for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { if (dis1[i] == 1 || (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { xdf1->rindex[nreff] = i; xdf1->ha[nreff] = (*recs)->ha; nreff++; } else xdf1->rchg[i] = 1; } xdf1->nreff = nreff; for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { if (dis2[i] == 1 || (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { xdf2->rindex[nreff] = i; xdf2->ha[nreff] = (*recs)->ha; nreff++; } else xdf2->rchg[i] = 1; } xdf2->nreff = nreff; xdl_free(dis); return 0; }