static BUN SORTfndwhich(BAT *b, const void *v, enum find_which which) { BUN lo, hi, mid; int cmp; BUN cur; BATiter bi; BUN diff, end; int tp; if (b == NULL || (!b->tsorted && !b->trevsorted)) return BUN_NONE; lo = BUNfirst(b); hi = BUNlast(b); if (BATtdense(b)) { /* no need for binary search on dense column */ if (*(const oid *) v < b->tseqbase || *(const oid *) v == oid_nil) return which == FIND_ANY ? BUN_NONE : lo; if (*(const oid *) v >= b->tseqbase + BATcount(b)) return which == FIND_ANY ? BUN_NONE : hi; cur = (BUN) (*(const oid *) v - b->tseqbase) + lo; return cur + (which == FIND_LAST); } if (b->ttype == TYPE_void) { assert(b->tseqbase == oid_nil); switch (which) { case FIND_FIRST: if (*(const oid *) v == oid_nil) return lo; case FIND_LAST: return hi; default: if (lo < hi && *(const oid *) v == oid_nil) return lo; return BUN_NONE; } } cmp = 1; cur = BUN_NONE; bi = bat_iterator(b); /* only use storage type if comparison functions are equal */ tp = ATOMbasetype(b->ttype); switch (which) { case FIND_FIRST: end = lo; if (lo >= hi || (b->tsorted ? atom_GE(BUNtail(bi, lo), v, b->ttype) : atom_LE(BUNtail(bi, lo), v, b->ttype))) { /* shortcut: if BAT is empty or first (and * hence all) tail value is >= v (if sorted) * or <= v (if revsorted), we're done */ return lo; } break; case FIND_LAST: end = hi; if (lo >= hi || (b->tsorted ? atom_LE(BUNtail(bi, hi - 1), v, b->ttype) : atom_GE(BUNtail(bi, hi - 1), v, b->ttype))) { /* shortcut: if BAT is empty or first (and * hence all) tail value is <= v (if sorted) * or >= v (if revsorted), we're done */ return hi; } break; default: /* case FIND_ANY -- stupid compiler */ end = 0; /* not used in this case */ if (lo >= hi) { /* empty BAT: value not found */ return BUN_NONE; } break; } if (b->tsorted) { switch (tp) { case TYPE_bte: SORTfndloop(bte, simple_CMP, BUNtloc); break; case TYPE_sht: SORTfndloop(sht, simple_CMP, BUNtloc); break; case TYPE_int: SORTfndloop(int, simple_CMP, BUNtloc); break; case TYPE_lng: SORTfndloop(lng, simple_CMP, BUNtloc); break; #ifdef HAVE_HGE case TYPE_hge: SORTfndloop(hge, simple_CMP, BUNtloc); break; #endif case TYPE_flt: SORTfndloop(flt, simple_CMP, BUNtloc); break; case TYPE_dbl: SORTfndloop(dbl, simple_CMP, BUNtloc); break; default: if (b->tvarsized) SORTfndloop(b->ttype, atom_CMP, BUNtvar); else SORTfndloop(b->ttype, atom_CMP, BUNtloc); break; } } else { switch (tp) { case TYPE_bte: SORTfndloop(bte, -simple_CMP, BUNtloc); break; case TYPE_sht: SORTfndloop(sht, -simple_CMP, BUNtloc); break; case TYPE_int: SORTfndloop(int, -simple_CMP, BUNtloc); break; case TYPE_lng: SORTfndloop(lng, -simple_CMP, BUNtloc); break; #ifdef HAVE_HGE case TYPE_hge: SORTfndloop(hge, -simple_CMP, BUNtloc); break; #endif case TYPE_flt: SORTfndloop(flt, -simple_CMP, BUNtloc); break; case TYPE_dbl: SORTfndloop(dbl, -simple_CMP, BUNtloc); break; default: if (b->tvarsized) SORTfndloop(b->ttype, -atom_CMP, BUNtvar); else SORTfndloop(b->ttype, -atom_CMP, BUNtloc); break; } } switch (which) { case FIND_FIRST: if (cmp == 0 && b->tkey == 0) { /* shift over multiple equals */ for (diff = cur - end; diff; diff >>= 1) { while (cur >= end + diff && atom_EQ(BUNtail(bi, cur - diff), v, b->ttype)) cur -= diff; } } break; case FIND_LAST: if (cmp == 0 && b->tkey == 0) { /* shift over multiple equals */ for (diff = (end - cur) >> 1; diff; diff >>= 1) { while (cur + diff < end && atom_EQ(BUNtail(bi, cur + diff), v, b->ttype)) cur += diff; } } cur += (cmp == 0); break; default: /* case FIND_ANY -- stupid compiler */ if (cmp) { /* not found */ cur = BUN_NONE; } break; } return cur; }
static BUN ALGjoinCost(Client cntxt, BAT *l, BAT *r, int flag) { BUN lc, rc; BUN cost=0; #if 0 BUN lsize,rsize; BAT *lsample, *rsample, *j; #endif (void) flag; (void) cntxt; lc = BATcount(l); rc = BATcount(r); #if 0 /* The sampling method */ if(flag < 2 && ( lc > 100000 || rc > 100000)){ lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3); lsample= BATsample(l,lsize); BBPreclaim(lsample); rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3); rsample= BATsample(r,rsize); BBPreclaim(rsample); j= BATjoin(l,r, MAX(lsize,rsize)); lsize= BATcount(j); BBPreclaim(j); return lsize; } #endif /* first use logical properties to estimate upper bound of result size */ if (l->tkey && r->hkey) cost = MIN(lc,rc); else if (l->tkey) cost = rc; else if (r->hkey) cost = lc; else if (lc * rc >= BUN_MAX) cost = BUN_MAX; else cost = lc * rc; /* then use physical properties to rank costs */ if (BATtdense(l) && BAThdense(r)) /* densefetchjoin -> sequential access */ cost /= 7; else if (BATtordered(l) && BAThdense(r)) /* orderedfetchjoin > sequential access */ cost /= 6; else if (BATtdense(l) && BAThordered(r) && flag != 0 /* no leftjoin */) /* (reversed-) orderedfetchjoin -> sequential access */ cost /= 6; else if (BAThdense(r) && rc <= SMALL_OPERAND) /* fetchjoin with random access in L1 */ cost /= 5; else if (BATtdense(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */) /* (reversed-) fetchjoin with random access in L1 */ cost /= 5; else if (BATtordered(l) && BAThordered(r)) /* mergejoin > sequential access */ cost /= 4; else if (BAThordered(r) && rc <= SMALL_OPERAND) /* binary-lookup-join with random access in L1 */ cost /= 3; else if (BATtordered(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */) /* (reversed-) binary-lookup-join with random access in L1 */ cost /= 3; else if ((BAThordered(r) && lc <= SMALL_OPERAND) || (BATtordered(l) && rc <= SMALL_OPERAND)) /* sortmergejoin with sorting in L1 */ cost /= 3; else if (rc <= SMALL_OPERAND) /* hashjoin with hashtable in L1 */ cost /= 3; else if (lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */) /* (reversed-) hashjoin with hashtable in L1 */ cost /= 3; else if (BAThdense(r)) /* fetchjoin with random access beyond L1 */ cost /= 2; else if (BATtdense(l) && flag != 0 /* no leftjoin */) /* (reversed-) fetchjoin with random access beyond L1 */ cost /= 2; else /* hashjoin with hashtable larger than L1 */ /* sortmergejoin with sorting beyond L1 */ cost /= 1; ALGODEBUG fprintf(stderr,"#batjoin cost ?"BUNFMT"\n",cost); return cost; }
static gdk_return CMDinfo(BAT **ret1, BAT **ret2, BAT *b) { BAT *bk, *bv; const char *mode, *accessmode; if (!(bk = BATnew(TYPE_void, TYPE_str, 128, TRANSIENT))) return GDK_FAIL; if (!(bv = BATnew(TYPE_void, TYPE_str, 128, TRANSIENT))) { BBPreclaim(bk); return GDK_FAIL; } BATseqbase(bk,0); BATseqbase(bv,0); *ret1 = bk; *ret2 = bv; if (b->batPersistence == PERSISTENT) { mode = "persistent"; } else if (b->batPersistence == TRANSIENT) { mode = "transient"; } else { mode ="unknown"; } switch (b->batRestricted) { case BAT_READ: accessmode = "read-only"; break; case BAT_WRITE: accessmode = "updatable"; break; case BAT_APPEND: accessmode = "append-only"; break; default: accessmode = "unknown"; } BUNappend(bk, "batId", FALSE); BUNappend(bv, BATgetId(b),FALSE); BUNappend(bk, "batCacheid", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->batCacheid)),FALSE); BUNappend(bk, "hparentid", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->H->heap.parentid)),FALSE); BUNappend(bk, "tparentid", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->T->heap.parentid)),FALSE); BUNappend(bk, "batSharecnt", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->batSharecnt)),FALSE); BUNappend(bk, "batCount", FALSE); BUNappend(bv, local_utoa((size_t)b->batCount),FALSE); BUNappend(bk, "batCapacity", FALSE); BUNappend(bv, local_utoa((size_t)b->batCapacity),FALSE); BUNappend(bk, "head", FALSE); BUNappend(bv, ATOMname(b->htype),FALSE); BUNappend(bk, "tail", FALSE); BUNappend(bv, ATOMname(b->ttype),FALSE); BUNappend(bk, "batPersistence", FALSE); BUNappend(bv, mode,FALSE); BUNappend(bk, "batRestricted", FALSE); BUNappend(bv, accessmode,FALSE); BUNappend(bk, "batRefcnt", FALSE); BUNappend(bv, local_itoa((ssize_t)(BBP_refs(b->batCacheid))),FALSE); BUNappend(bk, "batLRefcnt", FALSE); BUNappend(bv, local_itoa((ssize_t)(BBP_lrefs(b->batCacheid))),FALSE); BUNappend(bk, "batDirty", FALSE); BUNappend(bv, BATdirty(b) ? "dirty" : "clean",FALSE); BUNappend(bk, "hsorted", FALSE); BUNappend(bv, local_itoa((ssize_t)BAThordered(b)),FALSE); BUNappend(bk, "hrevsorted", FALSE); BUNappend(bv, local_itoa((ssize_t)BAThrevordered(b)),FALSE); BUNappend(bk, "hident", FALSE); BUNappend(bv, b->hident,FALSE); BUNappend(bk, "hdense", FALSE); BUNappend(bv, local_itoa((ssize_t)(BAThdense(b))),FALSE); BUNappend(bk, "hseqbase", FALSE); BUNappend(bv, oidtostr(b->hseqbase),FALSE); BUNappend(bk, "hkey", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->hkey)),FALSE); BUNappend(bk, "hvarsized", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->hvarsized)),FALSE); BUNappend(bk, "halign", FALSE); BUNappend(bv, local_utoa(b->halign),FALSE); BUNappend(bk, "hnosorted", FALSE); BUNappend(bv, local_utoa(b->H->nosorted),FALSE); BUNappend(bk, "hnorevsorted", FALSE); BUNappend(bv, local_utoa(b->H->norevsorted),FALSE); BUNappend(bk, "hnodense", FALSE); BUNappend(bv, local_utoa(b->H->nodense),FALSE); BUNappend(bk, "hnokey[0]", FALSE); BUNappend(bv, local_utoa(b->H->nokey[0]),FALSE); BUNappend(bk, "hnokey[1]", FALSE); BUNappend(bv, local_utoa(b->H->nokey[1]),FALSE); BUNappend(bk, "hnonil", FALSE); BUNappend(bv, local_utoa(b->H->nonil),FALSE); BUNappend(bk, "hnil", FALSE); BUNappend(bv, local_utoa(b->H->nil),FALSE); BUNappend(bk, "tident", FALSE); BUNappend(bv, b->tident,FALSE); BUNappend(bk, "tdense", FALSE); BUNappend(bv, local_itoa((ssize_t)(BATtdense(b))), FALSE); BUNappend(bk, "tseqbase", FALSE); BUNappend(bv, oidtostr(b->tseqbase), FALSE); BUNappend(bk, "tsorted", FALSE); BUNappend(bv, local_itoa((ssize_t)BATtordered(b)), FALSE); BUNappend(bk, "trevsorted", FALSE); BUNappend(bv, local_itoa((ssize_t)BATtrevordered(b)), FALSE); BUNappend(bk, "tkey", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->tkey)), FALSE); BUNappend(bk, "tvarsized", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->tvarsized)), FALSE); BUNappend(bk, "talign", FALSE); BUNappend(bv, local_utoa(b->talign), FALSE); BUNappend(bk, "tnosorted", FALSE); BUNappend(bv, local_utoa(b->T->nosorted), FALSE); BUNappend(bk, "tnorevsorted", FALSE); BUNappend(bv, local_utoa(b->T->norevsorted), FALSE); BUNappend(bk, "tnodense", FALSE); BUNappend(bv, local_utoa(b->T->nodense), FALSE); BUNappend(bk, "tnokey[0]", FALSE); BUNappend(bv, local_utoa(b->T->nokey[0]), FALSE); BUNappend(bk, "tnokey[1]", FALSE); BUNappend(bv, local_utoa(b->T->nokey[1]), FALSE); BUNappend(bk, "tnonil", FALSE); BUNappend(bv, local_utoa(b->T->nonil), FALSE); BUNappend(bk, "tnil", FALSE); BUNappend(bv, local_utoa(b->T->nil), FALSE); BUNappend(bk, "batInserted", FALSE); BUNappend(bv, local_utoa(b->batInserted), FALSE); BUNappend(bk, "batDeleted", FALSE); BUNappend(bv, local_utoa(b->batDeleted), FALSE); BUNappend(bk, "batFirst", FALSE); BUNappend(bv, local_utoa(b->batFirst), FALSE); BUNappend(bk, "htop", FALSE); BUNappend(bv, local_utoa(b->H->heap.free), FALSE); BUNappend(bk, "ttop", FALSE); BUNappend(bv, local_utoa(b->T->heap.free), FALSE); BUNappend(bk, "batStamp", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->batStamp)), FALSE); BUNappend(bk, "lastUsed", FALSE); BUNappend(bv, local_itoa((ssize_t)(BBP_lastused(b->batCacheid))), FALSE); BUNappend(bk, "curStamp", FALSE); BUNappend(bv, local_itoa((ssize_t)(BBPcurstamp())), FALSE); BUNappend(bk, "batCopiedtodisk", FALSE); BUNappend(bv, local_itoa((ssize_t)(b->batCopiedtodisk)), FALSE); BUNappend(bk, "batDirtydesc", FALSE); BUNappend(bv, b->batDirtydesc ? "dirty" : "clean", FALSE); BUNappend(bk, "H->heap.dirty", FALSE); BUNappend(bv, b->H->heap.dirty ? "dirty" : "clean", FALSE); BUNappend(bk, "T->heap.dirty", FALSE); BUNappend(bv, b->T->heap.dirty ? "dirty" : "clean", FALSE); infoHeap(bk, bv, &b->H->heap, "head."); infoHeap(bk, bv, &b->T->heap, "tail."); BUNappend(bk, "H->vheap->dirty", FALSE); BUNappend(bv, (b->H->vheap && b->H->vheap->dirty) ? "dirty" : "clean", FALSE); infoHeap(bk, bv, b->H->vheap, "hheap."); BUNappend(bk, "T->vheap->dirty", FALSE); BUNappend(bv, (b->T->vheap && b->T->vheap->dirty) ? "dirty" : "clean", FALSE); infoHeap(bk, bv, b->T->vheap, "theap."); /* dump index information */ if (b->H->hash) { HASHinfo(bk, bv, b->H->hash, "hhash->"); } if (b->T->hash) { HASHinfo(bk, bv, b->T->hash, "thash->"); } assert(BATcount(bk) == BATcount(bv)); return GDK_SUCCEED; }