static AGGRtask* GROUPcollect( Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ AGGRtask *a; int i; BAT *b, *bs, *bh = NULL; BUN sample; (void) mb; (void) cntxt; a= (AGGRtask *) GDKzalloc(sizeof(*a)); if ( a == NULL) return NULL; a->bid = (bat*) GDKzalloc(pci->argc * sizeof(bat)); a->cols = (BAT**) GDKzalloc(pci->argc * sizeof(BAT*)); a->unique = (BUN *) GDKzalloc(pci->argc * sizeof(BUN)); if ( a->cols == NULL || a->bid == NULL || a->unique == NULL){ if(a->cols) GDKfree(a->cols); if(a->bid) GDKfree(a->bid); if(a->unique) GDKfree(a->unique); GDKfree(a); return NULL; } for ( i= pci->retc; i< pci->argc; i++, a->last++) { a->bid[a->last] = *getArgReference_bat(stk,pci,i); b = a->cols[a->last]= BATdescriptor(a->bid[a->last]); if ( a->cols[a->last] == NULL){ for(a->last--; a->last>=0; a->last--) BBPunfix(a->cols[a->last]->batCacheid); GDKfree(a->cols); GDKfree(a->bid); GDKfree(a->unique); GDKfree(a); return NULL; } sample = BATcount(b) < 1000 ? BATcount(b): 1000; bs = BATsample( b, sample); if (bs) { bh = BATunique(b, bs); if (bh) { a->unique[a->last] = BATcount(bh); BBPunfix(bh->batCacheid); } BBPunfix(bs->batCacheid); } if ( b->tsorted) a->unique[a->last] = 1000; /* sorting helps grouping */ a->size = BATcount(b); } #ifdef _DEBUG_GROUPBY_ for(i=0; i<a->last; i++) fprintf(stderr,"#group %d unique "BUNFMT "\n", i, a->unique[i]); #endif return a; }
str SAMPLEuniform(bat *r, bat *b, ptr s) { BAT *br, *bb; if ((bb = BATdescriptor(*b)) == NULL) { throw(MAL, "sample.uniform", INTERNAL_BAT_ACCESS); } br = BATsample(bb,*(BUN *)s); if (br == NULL) throw(MAL, "sample.uniform", OPERATION_FAILED); BBPunfix(bb->batCacheid); BBPkeepref(*r = br->batCacheid); return MAL_SUCCEED; }
static BUN ALGjoinCost(Client cntxt, BAT *l, BAT *r, int flag) { BUN lc, rc; BUN cost=0; #if 0 BUN lsize,rsize; BAT *lsample, *rsample, *j; #endif (void) flag; (void) cntxt; lc = BATcount(l); rc = BATcount(r); #if 0 /* The sampling method */ if(flag < 2 && ( lc > 100000 || rc > 100000)){ lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3); lsample= BATsample(l,lsize); BBPreclaim(lsample); rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3); rsample= BATsample(r,rsize); BBPreclaim(rsample); j= BATjoin(l,r, MAX(lsize,rsize)); lsize= BATcount(j); BBPreclaim(j); return lsize; } #endif /* first use logical properties to estimate upper bound of result size */ if (l->tkey && r->hkey) cost = MIN(lc,rc); else if (l->tkey) cost = rc; else if (r->hkey) cost = lc; else if (lc * rc >= BUN_MAX) cost = BUN_MAX; else cost = lc * rc; /* then use physical properties to rank costs */ if (BATtdense(l) && BAThdense(r)) /* densefetchjoin -> sequential access */ cost /= 7; else if (BATtordered(l) && BAThdense(r)) /* orderedfetchjoin > sequential access */ cost /= 6; else if (BATtdense(l) && BAThordered(r) && flag != 0 /* no leftjoin */) /* (reversed-) orderedfetchjoin -> sequential access */ cost /= 6; else if (BAThdense(r) && rc <= SMALL_OPERAND) /* fetchjoin with random access in L1 */ cost /= 5; else if (BATtdense(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */) /* (reversed-) fetchjoin with random access in L1 */ cost /= 5; else if (BATtordered(l) && BAThordered(r)) /* mergejoin > sequential access */ cost /= 4; else if (BAThordered(r) && rc <= SMALL_OPERAND) /* binary-lookup-join with random access in L1 */ cost /= 3; else if (BATtordered(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */) /* (reversed-) binary-lookup-join with random access in L1 */ cost /= 3; else if ((BAThordered(r) && lc <= SMALL_OPERAND) || (BATtordered(l) && rc <= SMALL_OPERAND)) /* sortmergejoin with sorting in L1 */ cost /= 3; else if (rc <= SMALL_OPERAND) /* hashjoin with hashtable in L1 */ cost /= 3; else if (lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */) /* (reversed-) hashjoin with hashtable in L1 */ cost /= 3; else if (BAThdense(r)) /* fetchjoin with random access beyond L1 */ cost /= 2; else if (BATtdense(l) && flag != 0 /* no leftjoin */) /* (reversed-) fetchjoin with random access beyond L1 */ cost /= 2; else /* hashjoin with hashtable larger than L1 */ /* sortmergejoin with sorting beyond L1 */ cost /= 1; ALGODEBUG fprintf(stderr,"#batjoin cost ?"BUNFMT"\n",cost); return cost; }
str GRPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { bat *grp = (bat *) getArgReference(stk, pci, 0); bat *ext = (bat *) getArgReference(stk, pci, 1); bat *hist = (bat *) getArgReference(stk, pci, 2); int i, j; bat oldgrp, oldext, oldhist; str msg = MAL_SUCCEED; lng *sizes = (lng *) GDKzalloc(sizeof(lng) * pci->argc), l; bat *bid = (bat *) GDKzalloc(sizeof(bat) * pci->argc), bi; BAT *b, *sample, *uniq; BUN count = 0; assert(pci->argc >= 4); for (i = 3; i < pci->argc; i++) { bid[i] = *(int *) getArgReference(stk, pci, i); b = BATdescriptor(bid[i]); if (b) { sizes[i] = count = BATcount(b); sample = BATsample(b, 1000); if (sample) { uniq = BATkunique(BATmirror(sample)); if (uniq) { sizes[i] = (lng) BATcount(uniq); BBPreleaseref(uniq->batCacheid); } BBPreleaseref(sample->batCacheid); } BBPreleaseref(bid[i]); } } /* for (i=3; i<pci->argc; i++) mnstr_printf(cntxt->fdout,"# before[%d] "LLFMT"\n",i, sizes[i]); */ /* sort order may have influences */ /* SF100 Q16 showed < ordering is 2 times faster as > ordering */ for (i = 3; i < pci->argc; i++) for (j = i + 1; j < pci->argc; j++) if (sizes[j] < sizes[i]) { l = sizes[j]; sizes[j] = sizes[i]; sizes[i] = l; bi = bid[j]; bid[j] = bid[i]; bid[i] = bi; } /* for (i=2; i<pci->argc; i++) mnstr_printf(cntxt->fdout,"# after [%d] "LLFMT"\n",i, sizes[i]); */ /* (grp,ext,hist) := group.subgroup(..) */ *grp = 0; *ext = 0; *hist = 0; msg = GRPsubgroup1(grp, ext, hist, &bid[3]); i = 4; if (msg == MAL_SUCCEED && pci->argc > 4) do { /* early break when there are as many groups as histogram entries */ b = BATdescriptor(*hist); if (b) { j = BATcount(b) == count; BBPreleaseref(*hist); if (j) break; } /* (grp,ext,hist) := group.subgroup(arg,grp,ext,hist) */ oldgrp = *grp; oldext = *ext; oldhist = *hist; *grp = 0; *ext = 0; *hist = 0; msg = GRPsubgroup4(grp, ext, hist, &bid[i], &oldgrp, &oldext, &oldhist); BBPdecref(oldgrp, TRUE); BBPdecref(oldext, TRUE); BBPdecref(oldhist, TRUE); } while (msg == MAL_SUCCEED && ++i < pci->argc); GDKfree(sizes); GDKfree(bid); (void) cntxt; (void) mb; return msg; }