/* * The routines @emph{ALIGN_synced} and @emph{ALIGN_ordered} allow to * simply query the alignment status of the two head columns of two * BATs. */ int ALIGNsynced(BAT *b1, BAT *b2) { BATcheck(b1, "ALIGNsynced: bat 1 required", 0); BATcheck(b2, "ALIGNsynced: bat 2 required", 0); /* first try to prove head columns are not in sync */ if (BATcount(b1) != BATcount(b2)) return 0; if (ATOMtype(BAThtype(b1)) != ATOMtype(BAThtype(b2))) return 0; if (BAThvoid(b1) && BAThvoid(b2)) return (b1->hseqbase == b2->hseqbase); /* then try that they are */ if (b1->batCacheid == b2->batCacheid) return 1; /* same bat. trivial case */ if (BATcount(b1) == 0) return 1; /* empty bats of same type. trivial case */ if (b1->halign && b1->halign == b2->halign) return 1; /* columns marked as equal by algorithmics */ if (VIEWparentcol(b1) && ALIGNsynced(BBPcache(VIEWhparent(b1)), b2)) return 1; /* view on same bat --- left recursive def.. */ if (VIEWparentcol(b2) && ALIGNsynced(b1, BBPcache(VIEWhparent(b2)))) return 1; /* view on same bat --- right recursive def.. */ return 0; /* we simply don't know */ }
/* * The @#VIEWunlink@ routine cuts a reference to the parent. Part of the view * destroy sequence. */ static void VIEWunlink(BAT *b) { if (b) { bat hp = VIEWhparent(b), tp = VIEWtparent(b); bat vhp = VIEWvhparent(b), vtp = VIEWvtparent(b); BAT *hpb = NULL, *tpb = NULL; BAT *vhpb = NULL, *vtpb = NULL; if (hp) hpb = BBP_cache(hp); if (tp) tpb = BBP_cache(tp); if (hp && !vhp) vhp = hp; if (vhp) vhpb = BBP_cache(vhp); if (tp && !vtp) vtp = tp; if (vtp) vtpb = BBP_cache(vtp); if (hpb == NULL && tpb == NULL && vhpb == NULL && vtpb == NULL) return; /* unlink heaps shared with parent */ assert(b->H->vheap == NULL || b->H->vheap->parentid > 0); assert(b->T->vheap == NULL || b->T->vheap->parentid > 0); if (b->H->vheap && b->H->vheap->parentid != abs(b->batCacheid)) b->H->vheap = NULL; if (b->T->vheap && b->T->vheap->parentid != abs(b->batCacheid)) b->T->vheap = NULL; /* unlink properties shared with parent */ if (hpb && b->H->props && b->H->props == hpb->H->props) b->H->props = NULL; if (tpb && b->T->props && b->T->props == tpb->H->props) b->T->props = NULL; /* unlink hash accelerators shared with parent */ if (hpb && b->H->hash && b->H->hash == hpb->H->hash) b->H->hash = NULL; if (tpb && b->T->hash && b->T->hash == tpb->H->hash) b->T->hash = NULL; /* unlink imprints shared with parent */ if (hpb && b->H->imprints && b->H->imprints == hpb->H->imprints) b->H->imprints = NULL; if (tpb && b->T->imprints && b->T->imprints == tpb->H->imprints) b->T->imprints = NULL; } }
/* * @+ View BATS * The general routine for getting a 'view' BAT upon another BAT is * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel * support for this), you can then make vertical slices. Use * @emph{VIEWhead} for this. * * It is possible to create a view on a writable BAT. Updates in the * parent are then automatically reflected in the VIEW. Note that the * VIEW bat itself can never be modified. * * Horizontal views should only be given out on a view BAT, but only * if it is dead sure the parent BAT is read-only. This because they * cannot physically share the batBuns heap with the parent, as they * need a modified version. */ static BAT * VIEWhcreate(BAT *h) { BATstore *bs; BAT *bn; bat hp; BATcheck(h, "VIEWhcreate", NULL); bs = BATcreatedesc(h->htype, TYPE_void, FALSE, TRANSIENT); if (bs == NULL) return NULL; bn = &bs->B; BATsetdims(bn); hp = VIEWhparent(h); if (h->htype == TYPE_void) hp = 0; if ((hp == 0 && h->htype != TYPE_void) || h->H->heap.copied) hp = h->batCacheid; if (hp) BBPshare(hp); *bn->H = *h->H; bn->batDeleted = h->batDeleted; bn->batFirst = h->batFirst; bn->batInserted = h->batInserted; bn->batCount = h->batCount; bn->batCapacity = h->batCapacity; if (bn->H->vheap) { assert(h->H->vheap); assert(bn->H->vheap->parentid != 0); bn->H->vheap->farmid = h->H->vheap->farmid; BBPshare(bn->H->vheap->parentid); } /* correct values after copy of head info */ bn->H->props = NULL; bn->H->heap.copied = 0; if (hp) bn->H->heap.parentid = hp; if (hp && isVIEW(h)) bn->H->hash = NULL; BATinit_idents(bn); /* some bits must be copied individually. */ bn->batDirty = BATdirty(h); bn->batRestricted = BAT_READ; BBPcacheit(bs, 1); /* enter in BBP */ return bn; }
/* * The prime routine for the BAT layer is to create a new hash index. * Its argument is the element type and the maximum number of BUNs be * stored under the hash function. */ BAT * BAThash(BAT *b, BUN masksize) { BAT *o = NULL; lng t0,t1; (void) t0; (void) t1; if (VIEWhparent(b)) { bat p = VIEWhparent(b); o = b; b = BATdescriptor(p); if (!ALIGNsynced(o, b) || BUNfirst(o) != BUNfirst(b)) { BBPunfix(b->batCacheid); b = o; o = NULL; } } MT_lock_set(&GDKhashLock(ABS(b->batCacheid)), "BAThash"); if (b->H->hash == NULL) { unsigned int tpe = ATOMstorage(b->htype); BUN cnt = BATcount(b); BUN mask; BUN p = BUNfirst(b), q = BUNlast(b), r; Hash *h = NULL; Heap *hp = NULL; str nme = BBP_physical(b->batCacheid); BATiter bi = bat_iterator(b); ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b)); /* cnt = 0, hopefully there is a proper capacity from * which we can derive enough information */ if (!cnt) cnt = BATcapacity(b); if (b->htype == TYPE_void) { if (b->hseqbase == oid_nil) { MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash"); ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n"); return NULL; } ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n"); tpe = TYPE_void; } /* determine hash mask size p = first; then no dynamic * scheme */ if (masksize > 0) { mask = HASHmask(masksize); } else if (ATOMsize(ATOMstorage(tpe)) == 1) { mask = (1 << 8); } else if (ATOMsize(ATOMstorage(tpe)) == 2) { mask = (1 << 12); } else if (b->hkey) { mask = HASHmask(cnt); } else { /* dynamic hash: we start with * HASHmask(cnt/64); if there are too many * collisions we try HASHmask(cnt/16), then * HASHmask(cnt/4), and finally * HASHmask(cnt). */ mask = HASHmask(cnt >> 6); p += (cnt >> 2); /* try out on first 25% of b */ if (p > q) p = q; } if (mask < 1024) mask = 1024; t0 = GDKusec(); do { BUN nslots = mask >> 3; /* 1/8 full is too full */ r = BUNfirst(b); if (hp) { HEAPfree(hp); GDKfree(hp); } if (h) { ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n"); GDKfree(h); } /* create the hash structures */ hp = (Heap *) GDKzalloc(sizeof(Heap)); if (hp && (hp->filename = GDKmalloc(strlen(nme) + 12)) != NULL) sprintf(hp->filename, "%s.%chash", nme, b->batCacheid > 0 ? 'h' : 't'); if (hp == NULL || hp->filename == NULL || (h = HASHnew(hp, ATOMtype(b->htype), BATcapacity(b), mask)) == NULL) { MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash"); if (hp != NULL) { GDKfree(hp->filename); GDKfree(hp); } return NULL; } switch (tpe) { case TYPE_bte: starthash(bte); break; case TYPE_sht: starthash(sht); break; case TYPE_int: case TYPE_flt: starthash(int); break; case TYPE_dbl: case TYPE_lng: starthash(lng); break; default: for (; r < p; r++) { ptr v = BUNhead(bi, r); BUN c = (BUN) heap_hash_any(b->H->vheap, h, v); if ( HASHget(h,c) == HASHnil(h) && nslots-- == 0) break; /* mask too full */ HASHputlink(h,r, HASHget(h,c)); HASHput(h,c, r); } break; } } while (r < p && mask < cnt && (mask <<= 2)); /* finish the hashtable with the current mask */ p = r; switch (tpe) { case TYPE_bte: finishhash(bte); break; case TYPE_sht: finishhash(sht); break; case TYPE_int: case TYPE_flt: finishhash(int); break; case TYPE_dbl: case TYPE_lng: finishhash(lng); break; default: for (; p < q; p++) { ptr v = BUNhead(bi, p); BUN c = (BUN) heap_hash_any(b->H->vheap, h, v); HASHputlink(h,p, HASHget(h,c)); HASHput(h,c,p); } break; } b->H->hash = h; t1 = GDKusec(); ALGODEBUG fprintf(stderr, "#BAThash: hash construction "LLFMT" usec\n", t1-t0); ALGODEBUG HASHcollisions(b,b->H->hash); }
/* * Materialize a view into a normal BAT. If it is a slice, we really * want to reduce storage of the new BAT. */ gdk_return VIEWreset(BAT *b) { bat hp, tp, hvp, tvp; Heap head, tail, hh, th; BAT *n = NULL, *v = NULL; if (b == NULL) return GDK_FAIL; hp = VIEWhparent(b); tp = VIEWtparent(b); hvp = VIEWvhparent(b); tvp = VIEWvtparent(b); if (hp || tp) { BAT *m; BATstore *bs; BUN cnt; str nme; size_t nmelen; /* alloc heaps */ memset(&head, 0, sizeof(Heap)); memset(&tail, 0, sizeof(Heap)); memset(&hh, 0, sizeof(Heap)); memset(&th, 0, sizeof(Heap)); n = BATdescriptor(abs(b->batCacheid)); /* normalized */ if (n == NULL) goto bailout; m = BATmirror(n); /* mirror of normalized */ bs = BBP_desc(n->batCacheid); cnt = BATcount(n) + 1; nme = BBP_physical(n->batCacheid); nmelen = nme ? strlen(nme) : 0; assert(n->batCacheid > 0); assert(hp || !b->htype); assert(tp || !b->ttype); head.farmid = BBPselectfarm(n->batRole, n->htype, offheap); tail.farmid = BBPselectfarm(n->batRole, n->ttype, offheap); if (n->htype) { head.filename = (str) GDKmalloc(nmelen + 12); if (head.filename == NULL) goto bailout; snprintf(head.filename, nmelen + 12, "%s.head", nme); if (n->htype && HEAPalloc(&head, cnt, Hsize(n)) != GDK_SUCCEED) goto bailout; } if (n->ttype) { tail.filename = (str) GDKmalloc(nmelen + 12); if (tail.filename == NULL) goto bailout; snprintf(tail.filename, nmelen + 12, "%s.tail", nme); if (n->ttype && HEAPalloc(&tail, cnt, Tsize(n)) != GDK_SUCCEED) goto bailout; } if (n->H->vheap) { hh.farmid = BBPselectfarm(n->batRole, n->htype, varheap); hh.filename = (str) GDKmalloc(nmelen + 12); if (hh.filename == NULL) goto bailout; snprintf(hh.filename, nmelen + 12, "%s.hheap", nme); if (ATOMheap(n->htype, &hh, cnt) != GDK_SUCCEED) goto bailout; } if (n->T->vheap) { th.farmid = BBPselectfarm(n->batRole, n->ttype, varheap); th.filename = (str) GDKmalloc(nmelen + 12); if (th.filename == NULL) goto bailout; snprintf(th.filename, nmelen + 12, "%s.theap", nme); if (ATOMheap(n->ttype, &th, cnt) != GDK_SUCCEED) goto bailout; } v = VIEWcreate(n, n); if (v == NULL) goto bailout; /* cut the link to your parents */ VIEWunlink(n); if (hp) { BBPunshare(hp); BBPunfix(hp); } if (tp) { BBPunshare(tp); BBPunfix(tp); } if (hvp) { BBPunshare(hvp); BBPunfix(hvp); } if (tvp) { BBPunshare(tvp); BBPunfix(tvp); } /* make sure everything points there */ m->S = n->S = &bs->S; m->T = n->H = &bs->H; m->H = n->T = &bs->T; n->H->type = v->H->type; n->H->varsized = v->H->varsized; n->H->shift = v->H->shift; n->H->width = v->H->width; n->H->seq = v->H->seq; n->T->type = v->T->type; n->T->varsized = v->T->varsized; n->T->shift = v->T->shift; n->T->width = v->T->width; n->T->seq = v->T->seq; n->H->heap.parentid = n->T->heap.parentid = 0; n->batRestricted = BAT_WRITE; /* reset BOUND2KEY */ n->H->key = BAThkey(v); n->T->key = BATtkey(v); /* copy the heaps */ n->H->heap = head; n->T->heap = tail; /* unshare from parents heap */ if (hh.base) { assert(n->H->vheap == NULL); n->H->vheap = (Heap *) GDKzalloc(sizeof(Heap)); if (n->H->vheap == NULL) goto bailout; *n->H->vheap = hh; n->H->vheap->parentid = n->batCacheid; } if (th.base) { assert(n->T->vheap == NULL); n->T->vheap = (Heap *) GDKzalloc(sizeof(Heap)); if (n->T->vheap == NULL) goto bailout; *n->T->vheap = th; n->T->vheap->parentid = n->batCacheid; } n->batSharecnt = 0; n->batCopiedtodisk = 0; n->batDirty = 1; /* reset BOUND2KEY */ n->hkey = BAThkey(v); n->tkey = BATtkey(v); /* make the BAT empty and insert all again */ DELTAinit(n); /* reset capacity */ n->batCapacity = cnt; /* swap n and v in case the original input was reversed, because * BATins demands (v)oid-headed input */ if (b->batCacheid < 0) { n = m; m = BATmirror(v); } else { m = v; } /* insert all of v in n, and quit */ BATins(n, m, FALSE); BBPreclaim(v); BBPunfix(n->batCacheid); } return GDK_SUCCEED; bailout: BBPreclaim(v); if (n != NULL) BBPunfix(n->batCacheid); HEAPfree(&head, 0); HEAPfree(&tail, 0); HEAPfree(&hh, 0); HEAPfree(&th, 0); return GDK_FAIL; }
BAT * VIEWcreate_(BAT *h, BAT *t, int slice_view) { BATstore *bs; BAT *bn; bat hp = 0, tp = 0, vc = 0; BATcheck(h, "VIEWcreate_", NULL); BATcheck(t, "VIEWcreate_", NULL); if (BATcount(h) != BATcount(t)) slice_view = 1; bs = BATcreatedesc(h->htype, t->ttype, FALSE, TRANSIENT); if (bs == NULL) return NULL; bn = &bs->B; hp = VIEWhparent(h); tp = VIEWtparent(t); if ((hp == 0 && h->htype != TYPE_void) || h->H->heap.copied) hp = h->batCacheid; if ((tp == 0 && t->ttype != TYPE_void) || t->T->heap.copied) tp = -t->batCacheid; assert(h->htype != TYPE_void || !hp); assert(t->ttype != TYPE_void || !tp); /* the H and T column descriptors are fully copied. We need * copies because in case of a mark, we are going to override * a column with a void. Take care to zero the accelerator * data, though. */ *bn->H = *h->H; bn->batDeleted = h->batDeleted; bn->batFirst = h->batFirst; bn->batInserted = h->batInserted; bn->batCount = h->batCount; bn->batCapacity = h->batCapacity; if (bn->batFirst > 0) { bn->H->heap.base += h->batFirst * h->H->width; bn->batFirst = 0; } if (h->H == t->T) { vc = 1; tp = hp; bn->T = bn->H; } else { *bn->T = *t->T; if (bn->batCapacity > t->batCapacity) bn->batCapacity = t->batCapacity; if (t->batFirst > 0) bn->T->heap.base += t->batFirst * t->T->width; if (bn->batCount < t->batCount) { /* we can't be sure anymore there are nils */ bn->T->nil = 0; } } if (hp) BBPshare(hp); if (tp) BBPshare(tp); if (bn->H->vheap) { assert(h->H->vheap); assert(bn->H->vheap->parentid > 0); bn->H->vheap->farmid = h->H->vheap->farmid; BBPshare(bn->H->vheap->parentid); } if (bn->T->vheap) { assert(t->T->vheap); assert(bn->T->vheap->parentid > 0); bn->T->vheap->farmid = t->T->vheap->farmid; BBPshare(bn->T->vheap->parentid); } /* note: H/T->heap's points into bs which was just overwritten * with a copy from the parent(s). Clear the copied flag since * our heap was not copied from our parent(s) even if our * parent's heap was copied from its parent(s). */ bn->H->heap.copied = bn->T->heap.copied = 0; bn->H->props = bn->T->props = NULL; /* correct values after copy of head and tail info */ if (hp) bn->H->heap.parentid = hp; if (tp) bn->T->heap.parentid = tp; BATinit_idents(bn); /* Some bits must be copied individually. */ bn->batDirty = BATdirty(h); bn->batRestricted = BAT_READ; if (slice_view || !hp || isVIEW(h)) /* slices are unequal to their parents; cannot use accs */ bn->H->hash = NULL; else /* equal pointers to parent mean view uses acc of parent */ bn->H->hash = h->H->hash; if (slice_view || !tp || isVIEW(t)) bn->T->hash = NULL; else bn->T->hash = t->T->hash; /* imprints are shared, but the check is dynamic */ bn->H->imprints = NULL; bn->T->imprints = NULL; BBPcacheit(bs, 1); /* enter in BBP */ /* View of VIEW combine, ie we need to fix the head of the mirror */ if (vc) { BAT *bm = BATmirror(bn); bm->H = bn->H; } return bn; }