static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, xrecord_t *rec) { long hi; char const *line; xdlclass_t *rcrec; line = rec->ptr; hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) if (rcrec->ha == rec->ha && xdl_recmatch(rcrec->line, rcrec->size, rec->ptr, rec->size, cf->flags)) break; if (!rcrec) { if (!(rcrec = xdl_cha_alloc(&cf->ncha))) { return -1; } rcrec->idx = cf->count++; rcrec->line = line; rcrec->size = rec->size; rcrec->ha = rec->ha; rcrec->next = cf->rchash[hi]; cf->rchash[hi] = rcrec; } rec->ha = (unsigned long) rcrec->idx; hi = (long) XDL_HASHLONG(rec->ha, hbits); rec->next = rhash[hi]; rhash[hi] = rec; return 0; }
static int scanA(struct histindex *index, int line1, int count1) { int ptr, tbl_idx; unsigned int chain_len; struct record **rec_chain, *rec; for (ptr = LINE_END(1); line1 <= ptr; ptr--) { tbl_idx = TABLE_HASH(index, 1, ptr); rec_chain = index->records + tbl_idx; rec = *rec_chain; chain_len = 0; while (rec) { if (CMP(index, 1, rec->ptr, 1, ptr)) { /* * ptr is identical to another element. Insert * it onto the front of the existing element * chain. */ NEXT_PTR(index, ptr) = rec->ptr; rec->ptr = ptr; /* cap rec->cnt at MAX_CNT */ rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); LINE_MAP(index, ptr) = rec; goto continue_scan; } rec = rec->next; chain_len++; } if (chain_len == index->max_chain_length) return -1; /* * This is the first time we have ever seen this particular * element in the sequence. Construct a new chain for it. */ if (!(rec = xdl_cha_alloc(&index->rcha))) return -1; rec->ptr = ptr; rec->cnt = 1; rec->next = *rec_chain; *rec_chain = rec; LINE_MAP(index, ptr) = rec; continue_scan: ; /* no op */ } return 0; }
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { unsigned int hbits; long i, nrec, hsize, bsize; unsigned long hav; char const *blk, *cur, *top, *prev; xrecord_t *crec; xrecord_t **recs, **rrecs; xrecord_t **rhash; unsigned long *ha; char *rchg; long *rindex; if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) { return -1; } if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) { xdl_cha_free(&xdf->rcha); return -1; } hbits = xdl_hashbits((unsigned int) narec); hsize = 1 << hbits; if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) { xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } for (i = 0; i < hsize; i++) rhash[i] = NULL; nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { for (top = blk + bsize;;) { if (cur >= top) { if (!(cur = blk = xdl_mmfile_next(mf, &bsize))) break; top = blk + bsize; } prev = cur; hav = xdl_hash_record(&cur, top, xpp->flags); if (nrec >= narec) { narec *= 2; if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } recs = rrecs; } if (!(crec = xdl_cha_alloc(&xdf->rcha))) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } crec->ptr = prev; crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; if (xdl_classify_record(cf, rhash, hbits, crec) < 0) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } } } if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) { xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } memset(rchg, 0, (nrec + 2) * sizeof(char)); if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) { xdl_free(rchg); xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) { xdl_free(rindex); xdl_free(rchg); xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } xdf->nrec = nrec; xdf->recs = recs; xdf->hbits = hbits; xdf->rhash = rhash; xdf->rchg = rchg + 1; xdf->rindex = rindex; xdf->nreff = 0; xdf->ha = ha; xdf->dstart = 0; xdf->dend = nrec - 1; return 0; }