예제 #1
0
파일: groupby.c 프로젝트: MonetDB/MonetDB
static AGGRtask*
GROUPcollect( Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){
	AGGRtask *a;
	int i;
	BAT *b, *bs, *bh = NULL;
	BUN sample;

	(void) mb;
	(void) cntxt;
	a= (AGGRtask *) GDKzalloc(sizeof(*a));
	if ( a == NULL)
		return NULL;
	a->bid = (bat*) GDKzalloc(pci->argc * sizeof(bat));
	a->cols = (BAT**) GDKzalloc(pci->argc * sizeof(BAT*));
	a->unique = (BUN *) GDKzalloc(pci->argc * sizeof(BUN));
	if ( a->cols == NULL || a->bid == NULL || a->unique == NULL){
		if(a->cols) GDKfree(a->cols);
		if(a->bid) GDKfree(a->bid);
		if(a->unique) GDKfree(a->unique);
		GDKfree(a);
		return NULL;
	}
	for ( i= pci->retc; i< pci->argc; i++, a->last++) {
		a->bid[a->last] = *getArgReference_bat(stk,pci,i);
		b = a->cols[a->last]= BATdescriptor(a->bid[a->last]);
		if ( a->cols[a->last] == NULL){
			for(a->last--; a->last>=0; a->last--)
				BBPunfix(a->cols[a->last]->batCacheid);
			GDKfree(a->cols);
			GDKfree(a->bid);
			GDKfree(a->unique);
			GDKfree(a);
			return NULL;
		}
		sample = BATcount(b) < 1000 ? BATcount(b): 1000;
		bs = BATsample( b, sample);
		if (bs) {
			bh = BATunique(b, bs);
			if (bh) {
				a->unique[a->last] = BATcount(bh);
				BBPunfix(bh->batCacheid);
			}
			BBPunfix(bs->batCacheid);
		}
		if ( b->tsorted)
			a->unique[a->last] = 1000; /* sorting helps grouping */
		a->size = BATcount(b);
	}

#ifdef _DEBUG_GROUPBY_
	for(i=0; i<a->last; i++)
		fprintf(stderr,"#group %d unique "BUNFMT "\n", i, a->unique[i]);
#endif
	return a;
}
예제 #2
0
파일: sample.c 프로젝트: lajus/monetinr
str
SAMPLEuniform(bat *r, bat *b, ptr s) {
	BAT *br, *bb;

	if ((bb = BATdescriptor(*b)) == NULL) {
		throw(MAL, "sample.uniform", INTERNAL_BAT_ACCESS);
	}
	br = BATsample(bb,*(BUN *)s);
	if (br == NULL)
		throw(MAL, "sample.uniform", OPERATION_FAILED);

	BBPunfix(bb->batCacheid);
	BBPkeepref(*r = br->batCacheid);
	return MAL_SUCCEED;

}
예제 #3
0
static BUN
ALGjoinCost(Client cntxt, BAT *l, BAT *r, int flag)
{
	BUN lc, rc;
	BUN cost=0;
#if 0
	BUN lsize,rsize;
	BAT *lsample, *rsample, *j; 
#endif

	(void) flag;
	(void) cntxt;
	lc = BATcount(l);
	rc = BATcount(r);
#if 0	
	/* The sampling method */
	if(flag < 2 && ( lc > 100000 || rc > 100000)){
		lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
		lsample= BATsample(l,lsize);
		BBPreclaim(lsample);
		rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
		rsample= BATsample(r,rsize);
		BBPreclaim(rsample);
		j= BATjoin(l,r, MAX(lsize,rsize));
		lsize= BATcount(j);
		BBPreclaim(j);
		return lsize;
	}
#endif

	/* first use logical properties to estimate upper bound of result size */
	if (l->tkey && r->hkey)
		cost = MIN(lc,rc);
	else
	if (l->tkey)
		cost = rc;
	else
	if (r->hkey)
		cost = lc;
	else
	if (lc * rc >= BUN_MAX)
		cost = BUN_MAX;
	else
		cost = lc * rc;

	/* then use physical properties to rank costs */
	if (BATtdense(l) && BAThdense(r))
		/* densefetchjoin -> sequential access */
		cost /= 7;
	else
	if (BATtordered(l) && BAThdense(r))
		/* orderedfetchjoin > sequential access */
		cost /= 6;
	else
	if (BATtdense(l) && BAThordered(r) && flag != 0 /* no leftjoin */)
		/* (reversed-) orderedfetchjoin -> sequential access */
		cost /= 6;
	else
	if (BAThdense(r) && rc <= SMALL_OPERAND)
		/* fetchjoin with random access in L1 */
		cost /= 5;
	else
	if (BATtdense(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */)
		/* (reversed-) fetchjoin with random access in L1 */
		cost /= 5;
	else
	if (BATtordered(l) && BAThordered(r))
		/* mergejoin > sequential access */
		cost /= 4;
	else
	if (BAThordered(r) && rc <= SMALL_OPERAND)
		/* binary-lookup-join with random access in L1 */
		cost /= 3;
	else
	if (BATtordered(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */)
		/* (reversed-) binary-lookup-join with random access in L1 */
		cost /= 3;
	else
	if ((BAThordered(r) && lc <= SMALL_OPERAND) || (BATtordered(l) && rc <= SMALL_OPERAND))
		/* sortmergejoin with sorting in L1 */
		cost /= 3;
	else
	if (rc <= SMALL_OPERAND)
		/* hashjoin with hashtable in L1 */
		cost /= 3;
	else
	if (lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */)
		/* (reversed-) hashjoin with hashtable in L1 */
		cost /= 3;
	else
	if (BAThdense(r))
		/* fetchjoin with random access beyond L1 */
		cost /= 2;
	else
	if (BATtdense(l) && flag != 0 /* no leftjoin */)
		/* (reversed-) fetchjoin with random access beyond L1 */
		cost /= 2;
	else
		/* hashjoin with hashtable larger than L1 */
		/* sortmergejoin with sorting beyond L1 */
		cost /= 1;

	ALGODEBUG
		fprintf(stderr,"#batjoin cost ?"BUNFMT"\n",cost);
	return cost;
}
예제 #4
0
str
GRPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
	bat *grp = (bat *) getArgReference(stk, pci, 0);
	bat *ext = (bat *) getArgReference(stk, pci, 1);
	bat *hist = (bat *) getArgReference(stk, pci, 2);
	int i, j;
	bat oldgrp, oldext, oldhist;
	str msg = MAL_SUCCEED;
	lng *sizes = (lng *) GDKzalloc(sizeof(lng) * pci->argc), l;
	bat *bid = (bat *) GDKzalloc(sizeof(bat) * pci->argc), bi;
	BAT *b, *sample, *uniq;
	BUN count = 0;

	assert(pci->argc >= 4);
	for (i = 3; i < pci->argc; i++) {
		bid[i] = *(int *) getArgReference(stk, pci, i);
		b = BATdescriptor(bid[i]);
		if (b) {
			sizes[i] = count = BATcount(b);
			sample = BATsample(b, 1000);
			if (sample) {
				uniq = BATkunique(BATmirror(sample));
				if (uniq) {
					sizes[i] = (lng) BATcount(uniq);
					BBPreleaseref(uniq->batCacheid);
				}
				BBPreleaseref(sample->batCacheid);
			}
			BBPreleaseref(bid[i]);
		}
	}

	/* for (i=3; i<pci->argc; i++)
	   mnstr_printf(cntxt->fdout,"# before[%d] "LLFMT"\n",i, sizes[i]); */
	/* sort order may have influences */
	/* SF100 Q16 showed < ordering is 2 times faster as > ordering */
	for (i = 3; i < pci->argc; i++)
		for (j = i + 1; j < pci->argc; j++)
			if (sizes[j] < sizes[i]) {
				l = sizes[j];
				sizes[j] = sizes[i];
				sizes[i] = l;
				bi = bid[j];
				bid[j] = bid[i];
				bid[i] = bi;
			}
	/* for (i=2; i<pci->argc; i++)
	   mnstr_printf(cntxt->fdout,"# after [%d] "LLFMT"\n",i, sizes[i]); */

	/* (grp,ext,hist) := group.subgroup(..) */
	*grp = 0;
	*ext = 0;
	*hist = 0;
	msg = GRPsubgroup1(grp, ext, hist, &bid[3]);
	i = 4;
	if (msg == MAL_SUCCEED && pci->argc > 4)
		do {
			/* early break when there are as many groups as histogram entries */
			b = BATdescriptor(*hist);
			if (b) {
				j = BATcount(b) == count;
				BBPreleaseref(*hist);
				if (j)
					break;
			}

			/* (grp,ext,hist) := group.subgroup(arg,grp,ext,hist) */
			oldgrp = *grp;
			oldext = *ext;
			oldhist = *hist;
			*grp = 0;
			*ext = 0;
			*hist = 0;
			msg = GRPsubgroup4(grp, ext, hist, &bid[i], &oldgrp, &oldext, &oldhist);
			BBPdecref(oldgrp, TRUE);
			BBPdecref(oldext, TRUE);
			BBPdecref(oldhist, TRUE);
		} while (msg == MAL_SUCCEED && ++i < pci->argc);
	GDKfree(sizes);
	GDKfree(bid);
	(void) cntxt;
	(void) mb;
	return msg;
}