int xdl_seek_mmfile(mmfile_t *mmf, long off) { long bsize; if (xdl_mmfile_first(mmf, &bsize)) { do { if (off < bsize) { mmf->rpos = off; return 0; } off -= bsize; } while (xdl_mmfile_first(mmf, &bsize)); } return -1; }
long xdl_guess_lines(mmfile_t *mf) { long nl = 0, size, tsize = 0; char const *data, *cur, *top; if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { for (top = data + size; nl < XDL_GUESS_NLINES;) { if (cur >= top) { tsize += (long) (cur - data); if (!(cur = data = xdl_mmfile_next(mf, &size))) break; top = data + size; } nl++; if (!(cur = memchr(cur, '\n', top - cur))) cur = top; else cur++; } tsize += (long) (cur - data); } if (nl && tsize) nl = xdl_mmfile_size(mf) / (tsize / nl); return nl + 1; }
static long xdlt_mmfile_size(mmfile_t *mf) { char const *blk; long sz; long size = 0; for (blk = xdl_mmfile_first(mf, &sz); blk != NULL; blk = xdl_mmfile_next(mf, &sz)) { size += sz; } return (size); }
static int xdlt_read_mmfile(char *data, mmfile_t *mf) { char const *blk; long sz; long size = 0; for (blk = xdl_mmfile_first(mf, &sz); blk != NULL; blk = xdl_mmfile_next(mf, &sz)) { memcpy(String_val(data) + size, blk, sz); size += sz; } return size; }
int xdl_mmfile_cmp(mmfile_t *mmf1, mmfile_t *mmf2) { int cres; long size, bsize1, bsize2, size1, size2; char const *blk1, *cur1, *top1; char const *blk2, *cur2, *top2; if ((cur1 = blk1 = xdl_mmfile_first(mmf1, &bsize1)) != NULL) top1 = blk1 + bsize1; if ((cur2 = blk2 = xdl_mmfile_first(mmf2, &bsize2)) != NULL) top2 = blk2 + bsize2; if (!cur1) { if (!cur2) return 0; return -*cur2; } else if (!cur2) return *cur1; for (;;) { if (cur1 >= top1) { if ((cur1 = blk1 = xdl_mmfile_next(mmf1, &bsize1)) != NULL) top1 = blk1 + bsize1; } if (cur2 >= top2) { if ((cur2 = blk2 = xdl_mmfile_next(mmf2, &bsize2)) != NULL) top2 = blk2 + bsize2; } if (!cur1) { if (!cur2) break; return -*cur2; } else if (!cur2) return *cur1; size1 = top1 - cur1; size2 = top2 - cur2; size = XDL_MIN(size1, size2); if ((cres = memcmp(cur1, cur2, size)) != 0) return cres; cur1 += size; cur2 += size; } return 0; }
static int xdlt_write_mmfile(FILE *fp, mmfile_t *mf) { char const *blk; long sz; long size = 0; long nblks = 0; for (blk = xdl_mmfile_first(mf, &sz); blk != NULL; blk = xdl_mmfile_next(mf, &sz)) { fwrite(blk, sz, 1, fp); size += sz; nblks ++; } fprintf(fp, "==> %ld blks, %ld size\n", nblks, size); return size; }
int xdlt_change_file(mmfile_t *mfo, mmfile_t *mfr, double rmod, int chmax) { long skipln, lnsize, bsize; char const *blk, *cur, *top, *eol; char lnbuf[XDLT_MAX_LINE_SIZE + 1]; if (xdl_init_mmfile(mfr, XDLT_STD_BLKSIZE, XDL_MMF_ATOMIC) < 0) { return -1; } if ((blk = xdl_mmfile_first(mfo, &bsize)) != NULL) { for (cur = blk, top = blk + bsize, skipln = 0;;) { if (cur >= top) { if ((blk = xdl_mmfile_next(mfo, &bsize)) == NULL) break; cur = blk; top = blk + bsize; } if (!(eol = memchr(cur, '\n', top - cur))) eol = top; if (!skipln) { if (DBL_RAND() < rmod) { skipln = rand() % chmax; if (rand() & 1) { for (; skipln > 0; skipln--) { lnsize = xdlt_gen_line(lnbuf, XDLT_MAX_LINE_SIZE); if (xdl_write_mmfile(mfr, lnbuf, lnsize) != lnsize) { xdl_free_mmfile(mfr); return -1; } } } } else { lnsize = (eol - cur) + 1; if (xdl_write_mmfile(mfr, cur, lnsize) != lnsize) { xdl_free_mmfile(mfr); return -1; } } } else skipln--; cur = eol + 1; } } return 0; }
static int xdl_init_recfile(mmfile_t *mf, int ispatch, recfile_t *rf) { long narec, nrec, bsize; recinfo_t *recs, *rrecs; char const *blk, *cur, *top, *eol; narec = xdl_guess_lines(mf); if (!(recs = (recinfo_t *) xdl_malloc(narec * sizeof(recinfo_t)))) { return -1; } nrec = 0; if ((cur = blk = (char const *)xdl_mmfile_first(mf, &bsize)) != NULL) { for (top = blk + bsize;;) { if (cur >= top) { if (!(cur = blk = (char const *)xdl_mmfile_next(mf, &bsize))) break; top = blk + bsize; } if (nrec >= narec) { narec *= 2; if (!(rrecs = (recinfo_t *) xdl_realloc(recs, narec * sizeof(recinfo_t)))) { xdl_free(recs); return -1; } recs = rrecs; } recs[nrec].ptr = cur; if (!(eol = (char*)memchr(cur, '\n', top - cur))) eol = top - 1; recs[nrec].size = (long) (eol - cur) + 1; if (ispatch && *cur == '\\' && nrec > 0 && recs[nrec - 1].size > 0 && recs[nrec - 1].ptr[recs[nrec - 1].size - 1] == '\n') recs[nrec - 1].size--; else nrec++; cur = eol + 1; } } rf->mf = mf; rf->nrec = nrec; rf->recs = recs; return 0; }
long xdl_guess_lines(mmfile_t *mf, long sample) { long nl = 0, size, tsize = 0; char const *data, *cur, *top; if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { for (top = data + size; nl < sample && cur < top; ) { nl++; if (!(cur = memchr(cur, '\n', top - cur))) cur = top; else cur++; } tsize += (long) (cur - data); } if (nl && tsize) nl = xdl_mmfile_size(mf) / (tsize / nl); return nl + 1; }
int xdl_mmfile_compact(mmfile_t *mmfo, mmfile_t *mmfc, long bsize, unsigned long flags) { long fsize = xdl_mmfile_size(mmfo), size; char *data; char const *blk; if (xdl_init_mmfile(mmfc, bsize, flags) < 0) { return -1; } if (!(data = (char *) xdl_mmfile_writeallocate(mmfc, fsize))) { xdl_free_mmfile(mmfc); return -1; } if ((blk = (char const *) xdl_mmfile_first(mmfo, &size)) != NULL) { do { memcpy(data, blk, size); data += size; } while ((blk = (char const *) xdl_mmfile_next(mmfo, &size)) != NULL); } return 0; }
int xdlt_dump_mmfile(char const *fname, mmfile_t *mmf) { int fd; long size; char *blk; if ((fd = open(fname, O_CREAT | O_WRONLY, 0644)) == -1) { perror(fname); return -1; } if ((blk = (char *) xdl_mmfile_first(mmf, &size)) != NULL) { do { if (write(fd, blk, (size_t) size) != (size_t) size) { perror(fname); close(fd); return -1; } } while ((blk = (char *) xdl_mmfile_next(mmf, &size)) != NULL); } close(fd); return 0; }
int xdl_bpatch(mmfile_t *mmf, mmfile_t *mmfp, xdemitcb_t *ecb) { long size, off, csize, osize; unsigned long fp, ofp; char const *blk; unsigned char const *data, *top; mmbuffer_t mb; if ((blk = (char const *) xdl_mmfile_first(mmfp, &size)) == NULL || size < XDL_BPATCH_HDR_SIZE) { return -1; } ofp = xdl_mmf_adler32(mmf); osize = xdl_mmfile_size(mmf); XDL_LE32_GET(blk, fp); XDL_LE32_GET(blk + 4, csize); if (fp != ofp || csize != osize) { return -1; } blk += XDL_BPATCH_HDR_SIZE; size -= XDL_BPATCH_HDR_SIZE; do { for (data = (unsigned char const *) blk, top = data + size; data < top;) { if (*data == XDL_BDOP_INS) { data++; mb.size = (long) *data++; mb.ptr = (char *) data; data += mb.size; if (ecb->outf(ecb->priv, &mb, 1) < 0) { return -1; } } else if (*data == XDL_BDOP_INSB) { data++; XDL_LE32_GET(data, csize); data += 4; mb.size = csize; mb.ptr = (char *) data; data += mb.size; if (ecb->outf(ecb->priv, &mb, 1) < 0) { return -1; } } else if (*data == XDL_BDOP_CPY) { data++; XDL_LE32_GET(data, off); data += 4; XDL_LE32_GET(data, csize); data += 4; if (xdl_copy_range(mmf, off, csize, ecb) < 0) { return -1; } } else { return -1; } } } while ((blk = (char const *) xdl_mmfile_next(mmfp, &size)) != NULL); return 0; }
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { unsigned int hbits; long i, nrec, hsize, bsize; unsigned long hav; char const *blk, *cur, *top, *prev; xrecord_t *crec; xrecord_t **recs, **rrecs; xrecord_t **rhash; unsigned long *ha; char *rchg; long *rindex; if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) { return -1; } if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) { xdl_cha_free(&xdf->rcha); return -1; } hbits = xdl_hashbits((unsigned int) narec); hsize = 1 << hbits; if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) { xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } for (i = 0; i < hsize; i++) rhash[i] = NULL; nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { for (top = blk + bsize;;) { if (cur >= top) { if (!(cur = blk = xdl_mmfile_next(mf, &bsize))) break; top = blk + bsize; } prev = cur; hav = xdl_hash_record(&cur, top, xpp->flags); if (nrec >= narec) { narec *= 2; if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } recs = rrecs; } if (!(crec = xdl_cha_alloc(&xdf->rcha))) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } crec->ptr = prev; crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; if (xdl_classify_record(cf, rhash, hbits, crec) < 0) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } } } if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } memset(rchg, 0, (nrec + 2) * sizeof(char)); if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) { xdl_free(rchg); xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) { xdl_free(rindex); xdl_free(rchg); xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } xdf->nrec = nrec; xdf->recs = recs; xdf->hbits = hbits; xdf->rhash = rhash; xdf->rchg = rchg + 1; xdf->rindex = rindex; xdf->nreff = 0; xdf->ha = ha; xdf->dstart = 0; xdf->dend = nrec - 1; return 0; }
int xdlt_auto_mbinregress(bdiffparam_t const *bdp, long size, double rmod, int chmax, int n) { int i, res; mmbuffer_t *mbb; mmfile_t *mf, *mfc, *mfx; mmfile_t mfn, mff, mfd, mfb; xdemitcb_t ecb; if ((mbb = (mmbuffer_t *) xdl_malloc((n + 2) * sizeof(mmbuffer_t))) == NULL) { return -1; } if ((mf = mfc = (mmfile_t *) xdl_malloc((n + 2) * sizeof(mmfile_t))) == NULL) { xdl_free(mbb); return -1; } if (xdlt_create_file(mfc, size) < 0) { xdl_free(mf); xdl_free(mbb); return -1; } mbb[0].ptr = (char *) xdl_mmfile_first(mfc, &mbb[0].size); mfc++; mfx = mf; for (i = 0; i < n; i++) { if (xdlt_change_file(mfx, &mfn, rmod, chmax) < 0) { if (mfx != mf) xdl_free_mmfile(mfx); for (; i >= 0; i--) xdl_free_mmfile(mf + i); xdl_free(mf); xdl_free(mbb); return -1; } if (xdl_mmfile_compact(&mfn, &mff, XDLT_STD_BLKSIZE, XDL_MMF_ATOMIC) < 0) { xdl_free_mmfile(&mfn); if (mfx != mf) xdl_free_mmfile(mfx); for (; i >= 0; i--) xdl_free_mmfile(mf + i); xdl_free(mf); xdl_free(mbb); return -1; } xdl_free_mmfile(&mfn); if (xdlt_do_bindiff(mfx, &mff, bdp, &mfd) < 0) { xdl_free_mmfile(&mff); if (mfx != mf) xdl_free_mmfile(mfx); for (; i >= 0; i--) xdl_free_mmfile(mf + i); xdl_free(mf); xdl_free(mbb); return -1; } if (mfx != mf) xdl_free_mmfile(mfx); mfx = &mfb; *mfx = mff; if (xdl_mmfile_compact(&mfd, mfc, XDLT_STD_BLKSIZE, XDL_MMF_ATOMIC) < 0) { xdl_free_mmfile(&mfd); xdl_free_mmfile(mfx); for (; i >= 0; i--) xdl_free_mmfile(mf + i); xdl_free(mf); xdl_free(mbb); return -1; } mbb[i + 1].ptr = (char *) xdl_mmfile_first(mfc, &mbb[i + 1].size); mfc++; xdl_free_mmfile(&mfd); } if (xdl_init_mmfile(mfc, XDLT_STD_BLKSIZE, XDL_MMF_ATOMIC) < 0) { xdl_free_mmfile(mfx); for (i = n; i >= 0; i--) xdl_free_mmfile(mf + i); xdl_free(mf); xdl_free(mbb); return -1; } ecb.priv = mfc; ecb.outf = xdlt_mmfile_outf; if ((res = xdl_bpatch_multi(&mbb[0], &mbb[1], n, &ecb)) == 0) res = xdl_mmfile_cmp(mfx, mfc); xdl_free_mmfile(mfx); for (i = n + 1; i >= 0; i--) xdl_free_mmfile(mf + i); xdl_free(mf); xdl_free(mbb); return res; }