Esempio n. 1
0
/*
 * grouped aggregates
 */
static str
AGGRgrouped(bat *retval1, bat *retval2, BAT *b, BAT *g, BAT *e, int tp,
			BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, int, int, int),
			BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, double, int, int),
			BAT *quantile,
			int skip_nils,
			const char *malfunc)
{
	BAT *bn, *cnts = NULL, *t, *map;
	double qvalue;

   /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others are */
	assert((grpfunc1 != NULL && grpfunc2 == NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 != NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc != NULL) );
	/* if retval2 is non-NULL, we must have grpfunc2 */
	assert(retval2 == NULL || grpfunc2 != NULL);
	assert(quantile == NULL || quantilefunc != NULL);

	if (b == NULL || g == NULL || e == NULL) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc == BATgroupquantile))
		tp = b->ttype;
	if (!BAThdense(b) || !BAThdense(g)) {
		/* if b or g don't have a dense head, replace the head with a
		 * dense sequence */
		t = BATjoin(BATmirror(b), g, MIN(BATcount(b), BATcount(g)));
		BBPreleaseref(b->batCacheid);
		BBPreleaseref(g->batCacheid);
		b = BATmirror(BATmark(t, 0));
		g = BATmirror(BATmark(BATmirror(t), 0));
		BBPreleaseref(t->batCacheid);
	}
	if (b->hseqbase != g->hseqbase || BATcount(b) != BATcount(g)) {
		/* b and g are not aligned: align them by creating a view on
		 * one or the other */
		oid min;				/* lowest common oid */
		oid max;				/* highest common oid */
		min = b->hseqbase;
		if (min < g->hseqbase)
			min = g->hseqbase;
		max = b->hseqbase + BATcount(b);
		if (g->hseqbase + BATcount(g) < max)
			max = g->hseqbase + BATcount(g);
		if (b->hseqbase != min || b->hseqbase + BATcount(b) != max) {
			if (min >= max)
				min = max = b->hseqbase;
			t = BATslice(b, BUNfirst(b) + (BUN) (min - b->hseqbase),
						 BUNfirst(b) + (BUN) (max - b->hseqbase));
			BBPreleaseref(b->batCacheid);
			b = t;
		}
		if (g->hseqbase != min || g->hseqbase + BATcount(g) != max) {
			if (min >= max)
				min = max = g->hseqbase;
			t = BATslice(g, BUNfirst(g) + (BUN) (min - g->hseqbase),
						 BUNfirst(g) + (BUN) (max - g->hseqbase));
			BBPreleaseref(g->batCacheid);
			g = t;
		}
	}
	if (!BAThdense(e)) {
		/* if e doesn't have a dense head, renumber the group ids with
		 * a dense sequence at the cost of some left joins */
		map = BATmark(e, 0);	/* [gid,newgid(dense)] */
		BBPreleaseref(e->batCacheid);
		e = BATmirror(map);		/* [newgid(dense),gid] */
		t = BATleftjoin(g, map, BATcount(g)); /* [oid,newgid] */
		BBPreleaseref(g->batCacheid);
		g = t;
	} else {
		map = NULL;
	}
	if (grpfunc1)
		bn = (*grpfunc1)(b, g, e, NULL, tp, skip_nils, 1);
	if (quantilefunc) {
		assert(BATcount(quantile)>0);
		assert(quantile->ttype == TYPE_dbl);
		qvalue = ((const double *)Tloc(quantile, BUNfirst(quantile)))[0];
		if (qvalue <  0|| qvalue > 1) {
			char *s;
			s = createException(MAL, malfunc, "quantile value of %f is not in range [0,1]", qvalue);
			return s;
		}
		bn = (*quantilefunc)(b, g, e, NULL, tp, qvalue, skip_nils, 1);
	}
	if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, tp, skip_nils, 1) == GDK_FAIL)
		bn = NULL;
	if (bn != NULL && (grpfunc1 == BATgroupmin || grpfunc1 == BATgroupmax)) {
		t = BATproject(bn, b);
		BBPreleaseref(bn->batCacheid);
		bn = t;
	}
	BBPreleaseref(b->batCacheid);
	BBPreleaseref(g->batCacheid);
	if (map == NULL)			/* if map!=NULL, e is mirror of map */
		BBPreleaseref(e->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (map)
			BBPreleaseref(map->batCacheid);

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	if (map) {
		t = BATleftjoin(map, bn, BATcount(bn));
		BBPreleaseref(bn->batCacheid);
		bn = t;
		if (cnts) {
			t = BATleftjoin(map, cnts, BATcount(cnts));
			BBPreleaseref(cnts->batCacheid);
			cnts = t;
		}
		BBPreleaseref(map->batCacheid);
	}
	*retval1 = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	if (retval2) {
		*retval2 = cnts->batCacheid;
		BBPkeepref(cnts->batCacheid);
	}
	return MAL_SUCCEED;
}
Esempio n. 2
0
static BAT *
ALGjoinPathBody(Client cntxt, int top, BAT **joins, int flag)
{
	BAT *b = NULL;
	BUN estimate, e = 0;
	int i, j, k;
	int *postpone= (int*) GDKzalloc(sizeof(int) *top);
	int postponed=0;

	if(postpone == NULL){
		GDKerror("joinPathBody" MAL_MALLOC_FAIL);
		return NULL;
	}


	/* solve the join by pairing the smallest first */
	while (top > 1) {
		j = 0;
		estimate = ALGjoinCost(cntxt,joins[0],joins[1],flag);
		ALGODEBUG
			fprintf(stderr,"#joinPath estimate join(%d,%d) %d cnt="BUNFMT" %s\n", joins[0]->batCacheid, 
				joins[1]->batCacheid,(int)estimate, BATcount(joins[0]), postpone[0]?"postpone":"");
		for (i = 1; i < top - 1; i++) {
			e = ALGjoinCost(cntxt,joins[i], joins[i + 1],flag);
			ALGODEBUG
				fprintf(stderr,"#joinPath estimate join(%d,%d) %d cnt="BUNFMT" %s\n", joins[i]->batCacheid, 
					joins[i+1]->batCacheid,(int)e,BATcount(joins[i]),  postpone[i]?"postpone":"");
			if (e < estimate &&  ( !(postpone[i] && postpone[i+1]) || postponed<top)) {
				estimate = e;
				j = i;
			}
		}
		/*
		 * BEWARE. you may not use a size estimation, because it
		 * may fire a BATproperty check in a few cases.
		 * In case a join fails, we may try another order first before
		 * abandoning the task. It can handle cases where a Cartesian product emerges.
		 *
		 * A left-join sequence only requires the result to be sorted
		 * against the first operand. For all others operand pairs, the cheapest join suffice.
		 */

		switch(flag){
		case 0:
			if ( j == 0) {
				b = BATleftjoin(joins[j], joins[j + 1], BATcount(joins[j]));
				ALGODEBUG{
					fprintf(stderr,"#joinpath step produces "BUNFMT"\n", BATcount(b));
				}
				break;
			}
		case 1:
			b = BATjoin(joins[j], joins[j + 1], (BATcount(joins[j]) < BATcount(joins[j + 1])? BATcount(joins[j]):BATcount(joins[ j + 1])));
			break;
		case 3:
			b = BATproject(joins[j], joins[j + 1]);
			ALGODEBUG{
				fprintf(stderr,"#joinpath step produces "BUNFMT"\n", BATcount(b));
			}
			break;
		}
		if (b==NULL){
			if ( postpone[j] && postpone[j+1]){
				for( --top; top>=0; top--)
					BBPunfix(joins[top]->batCacheid);
				GDKfree(postpone);
				return NULL;
			}
			postpone[j] = TRUE;
			postpone[j+1] = TRUE;
			postponed = 0;
			for( k=0; k<top; k++)
				postponed += postpone[k]== TRUE;
			if ( postponed == top){
				for( --top; top>=0; top--)
					BBPunfix(joins[top]->batCacheid);
				GDKfree(postpone);
				return NULL;
			}
			/* clear the GDKerrors and retry */
			if( cntxt->errbuf )
				cntxt->errbuf[0]=0;
			continue;
		} else {
			/* reset the postponed joins */
			for( k=0; k<top; k++)
				postpone[k]=FALSE;
			if (!(b->batDirty&2)) BATsetaccess(b, BAT_READ);
			postponed = 0;
		}
		ALGODEBUG{
			if (b ) {
				fprintf(stderr, "#joinPath %d:= join(%d,%d)"
				" arguments %d (cnt= "BUNFMT") against (cnt "BUNFMT") cost "BUNFMT"\n", 
					b->batCacheid, joins[j]->batCacheid, joins[j + 1]->batCacheid,
					j, BATcount(joins[j]),  BATcount(joins[j+1]), e);
			}
		}

		if ( b == 0 ){
			for( --top; top>=0; top--)
				BBPunfix(joins[top]->batCacheid);
			GDKfree(postpone);
			return 0;
		}
		BBPunfix(joins[j]->batCacheid);
		BBPunfix(joins[j+1]->batCacheid);
		joins[j] = b;
		top--;
		for (i = j + 1; i < top; i++)
			joins[i] = joins[i + 1];
	}
Esempio n. 3
0
/*
 * grouped aggregates
 */
static str
AGGRgrouped(bat *retval, BAT *b, BAT *g, BAT *e, int tp,
			BAT *(*grpfunc)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			int skip_nils,
			const char *malfunc)
{
	BAT *bn, *t, *map;

	if (b == NULL || g == NULL || e == NULL) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && grpfunc == BATgroupmedian)
		tp = b->ttype;
	if (!BAThdense(b) || !BAThdense(g)) {
		/* if b or g don't have a dense head, replace the head with a
		 * dense sequence */
		t = BATjoin(BATmirror(b), g, MIN(BATcount(b), BATcount(g)));
		BBPreleaseref(b->batCacheid);
		BBPreleaseref(g->batCacheid);
		b = BATmirror(BATmark(t, 0));
		g = BATmirror(BATmark(BATmirror(t), 0));
		BBPreleaseref(t->batCacheid);
	}
	if (b->hseqbase != g->hseqbase || BATcount(b) != BATcount(g)) {
		/* b and g are not aligned: align them by creating a view on
		 * one or the other */
		oid min;				/* lowest common oid */
		oid max;				/* highest common oid */
		min = b->hseqbase;
		if (min < g->hseqbase)
			min = g->hseqbase;
		max = b->hseqbase + BATcount(b);
		if (g->hseqbase + BATcount(g) < max)
			max = g->hseqbase + BATcount(g);
		if (b->hseqbase != min || b->hseqbase + BATcount(b) != max) {
			if (min >= max)
				min = max = b->hseqbase;
			t = BATslice(b, BUNfirst(b) + (BUN) (min - b->hseqbase),
						 BUNfirst(b) + (BUN) (max - b->hseqbase));
			BBPreleaseref(b->batCacheid);
			b = t;
		}
		if (g->hseqbase != min || g->hseqbase + BATcount(g) != max) {
			if (min >= max)
				min = max = g->hseqbase;
			t = BATslice(g, BUNfirst(g) + (BUN) (min - g->hseqbase),
						 BUNfirst(g) + (BUN) (max - g->hseqbase));
			BBPreleaseref(g->batCacheid);
			g = t;
		}
	}
	if (!BAThdense(e)) {
		/* if e doesn't have a dense head, renumber the group ids with
		 * a dense sequence at the cost of some left joins */
		map = BATmark(e, 0);	/* [gid,newgid(dense)] */
		BBPreleaseref(e->batCacheid);
		e = BATmirror(map);		/* [newgid(dense),gid] */
		t = BATleftjoin(g, map, BATcount(g)); /* [oid,newgid] */
		BBPreleaseref(g->batCacheid);
		g = t;
	} else {
		map = NULL;
	}
	bn = (*grpfunc)(b, g, e, NULL, tp, skip_nils, 1);
	if (bn != NULL && (grpfunc == BATgroupmin || grpfunc == BATgroupmax)) {
		BAT *bnn = BATouterjoin(bn, b, BATcount(bn));
		BBPreleaseref(bn->batCacheid);
		bn = bnn;
	}
	BBPreleaseref(b->batCacheid);
	BBPreleaseref(g->batCacheid);
	if (map == NULL)			/* if map!=NULL, e is mirror of map */
		BBPreleaseref(e->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (map)
			BBPreleaseref(map->batCacheid);

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	if (map) {
		t = BATleftjoin(map, bn, BATcount(bn));
		BBPreleaseref(map->batCacheid);
		BBPreleaseref(bn->batCacheid);
		bn = t;
	}
	*retval = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	return MAL_SUCCEED;
}