Esempio n. 1
0
/*
 * The pack is an ordinary multi BAT insert. Oid synchronistion
 * between pieces should be ensured by the code generators.
 * The pack operation could be quite expensive, because it
 * may create a really large BAT.
 * The slice over a mat helps to avoid constructing intermediates
 * that are subsequently reduced.
 * Contrary to most operations, NIL arguments are skipped and
 * do not produce RUNTIME_OBJECT_MISSING.
 */
static str
MATpackInternal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
{
	int i, *ret = (int*) getArgReference(stk,p,0);
	BAT *b, *bn;
	BUN cap = 0;
	int tt = TYPE_any;
	(void) cntxt;
	(void) mb;

	for (i = 1; i < p->argc; i++) {
		int bid = stk->stk[getArg(p,i)].val.ival;
		b = BBPquickdesc(abs(bid),FALSE);
		if (b && bid < 0)
			b = BATmirror(b);
		if( b ){
			assert(BAThdense(b));
			if (tt == TYPE_any){
				tt = b->ttype;
			}
			if (!tt && tt != b->ttype)
				tt = b->ttype;
			cap += BATcount(b);
		}
	}
	if (tt == TYPE_any){
		*ret = 0;
		return MAL_SUCCEED;
	}

	bn = BATnew(TYPE_void, tt, cap, TRANSIENT);
	if (bn == NULL)
		throw(MAL, "mat.pack", MAL_MALLOC_FAIL);

	for (i = 1; i < p->argc; i++) {
		b = BATdescriptor(stk->stk[getArg(p,i)].val.ival);
		if( b ){
			if (BATcount(bn) == 0)
				BATseqbase(bn, b->H->seq);
			if (BATcount(bn) == 0)
				BATseqbase(BATmirror(bn), b->T->seq);
			BATappend(bn,b,FALSE);
			BBPunfix(b->batCacheid);
		}
	}
	assert(!bn->H->nil || !bn->H->nonil);
	assert(!bn->T->nil || !bn->T->nonil);
	BATsettrivprop(bn);
	BATderiveProps(bn,FALSE);
	BBPkeepref(*ret = bn->batCacheid);
	return MAL_SUCCEED;
}
Esempio n. 2
0
gdk_return
BATmaterializeh(BAT *b)
{
	int ht;
	BUN cnt;
	Heap head;
	BUN p, q;
	oid h, *x;
	bte tshift;

	BATcheck(b, "BATmaterialize", GDK_FAIL);
	assert(!isVIEW(b));
	ht = b->htype;
	cnt = BATcapacity(b);
	head = b->H->heap;
	p = BUNfirst(b);
	q = BUNlast(b);
	assert(cnt >= q - p);
	ALGODEBUG fprintf(stderr, "#BATmaterialize(%d);\n", (int) b->batCacheid);

	if (!BAThdense(b) || ht != TYPE_void) {
		/* no voids */
		return GDK_SUCCEED;
	}
	ht = TYPE_oid;

	/* cleanup possible ACC's */
	HASHdestroy(b);
	IMPSdestroy(b);

	b->H->heap.filename = NULL;
	if (HEAPalloc(&b->H->heap, cnt, sizeof(oid)) != GDK_SUCCEED) {
		b->H->heap = head;
		return GDK_FAIL;
	}

	/* point of no return */
	b->htype = ht;
	tshift = b->T->shift;
	BATsetdims(b);
	if (b->ttype) {
		b->T->shift = tshift;	/* restore in case it got changed */
		b->T->width = 1 << tshift;
	}
	b->batDirty = TRUE;
	b->batDirtydesc = TRUE;
	b->H->heap.dirty = TRUE;

	/* set the correct dense info */
	b->hdense = TRUE;

	/* So now generate [h..h+cnt-1] */
	h = b->hseqbase;
	x = (oid *) b->H->heap.base;
	for (; p < q; p++)
		*x++ = h++;
	cnt = h - b->hseqbase;
	BATsetcount(b, cnt);

	/* cleanup the old heaps */
	HEAPfree(&head, 0);
	return GDK_SUCCEED;
}
Esempio n. 3
0
/* BATsample implements sampling for void headed BATs */
BAT *
BATsample(BAT *b, BUN n)
{
	BAT *bn;
	BUN cnt, slen;
	BUN rescnt;
	struct oidtreenode *tree = NULL;

	BATcheck(b, "BATsample", NULL);
	assert(BAThdense(b));
	ERRORcheck(n > BUN_MAX, "BATsample: sample size larger than BUN_MAX\n", NULL);
	ALGODEBUG
		fprintf(stderr, "#BATsample: sample " BUNFMT " elements.\n", n);

	cnt = BATcount(b);
	/* empty sample size */
	if (n == 0) {
		bn = BATnew(TYPE_void, TYPE_void, 0, TRANSIENT);
		if (bn == NULL) {
			GDKerror("BATsample: memory allocation error");
			return NULL;
		}
		BATsetcount(bn, 0);
		BATseqbase(bn, 0);
		BATseqbase(BATmirror(bn), 0);
	/* sample size is larger than the input BAT, return all oids */
	} else if (cnt <= n) {
		bn = BATnew(TYPE_void, TYPE_void, cnt, TRANSIENT);
		if (bn == NULL) {
			GDKerror("BATsample: memory allocation error");
			return NULL;
		}
		BATsetcount(bn, cnt);
		BATseqbase(bn, 0);
		BATseqbase(BATmirror(bn), b->H->seq);
	} else {
		oid minoid = b->hseqbase;
		oid maxoid = b->hseqbase + cnt;
		/* if someone samples more than half of our tree, we
		 * do the antiset */
		bit antiset = n > cnt / 2;
		slen = n;
		if (antiset)
			n = cnt - n;

		tree = GDKmalloc(n * sizeof(struct oidtreenode));
		if (tree == NULL) {
			GDKerror("#BATsample: memory allocation error");
			return NULL;
		}
		bn = BATnew(TYPE_void, TYPE_oid, slen, TRANSIENT);
		if (bn == NULL) {
			GDKfree(tree);
			GDKerror("#BATsample: memory allocation error");
			return NULL;
		}
		/* while we do not have enough sample OIDs yet */
		for (rescnt = 0; rescnt < n; rescnt++) {
			oid candoid;
			do {
				/* generate a new random OID */
				candoid = (oid) (minoid + DRAND * (maxoid - minoid));
				/* if that candidate OID was already
				 * generated, try again */
			} while (!OIDTreeMaybeInsert(tree, candoid, rescnt));
		}
		if (!antiset) {
			OIDTreeToBAT(tree, bn);
		} else {
			OIDTreeToBATAntiset(tree, bn, minoid, maxoid);
		}
		GDKfree(tree);

		BATsetcount(bn, slen);
		bn->trevsorted = bn->batCount <= 1;
		bn->tsorted = 1;
		bn->tkey = 1;
		bn->tdense = bn->batCount <= 1;
		if (bn->batCount == 1)
			bn->tseqbase = *(oid *) Tloc(bn, BUNfirst(bn));
		bn->hdense = 1;
		bn->hseqbase = 0;
		bn->hkey = 1;
		bn->hrevsorted = bn->batCount <= 1;
		bn->hsorted = 1;
	}
	return bn;
}
Esempio n. 4
0
static BUN
ALGjoinCost(Client cntxt, BAT *l, BAT *r, int flag)
{
	BUN lc, rc;
	BUN cost=0;
#if 0
	BUN lsize,rsize;
	BAT *lsample, *rsample, *j; 
#endif

	(void) flag;
	(void) cntxt;
	lc = BATcount(l);
	rc = BATcount(r);
#if 0	
	/* The sampling method */
	if(flag < 2 && ( lc > 100000 || rc > 100000)){
		lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
		lsample= BATsample(l,lsize);
		BBPreclaim(lsample);
		rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3);
		rsample= BATsample(r,rsize);
		BBPreclaim(rsample);
		j= BATjoin(l,r, MAX(lsize,rsize));
		lsize= BATcount(j);
		BBPreclaim(j);
		return lsize;
	}
#endif

	/* first use logical properties to estimate upper bound of result size */
	if (l->tkey && r->hkey)
		cost = MIN(lc,rc);
	else
	if (l->tkey)
		cost = rc;
	else
	if (r->hkey)
		cost = lc;
	else
	if (lc * rc >= BUN_MAX)
		cost = BUN_MAX;
	else
		cost = lc * rc;

	/* then use physical properties to rank costs */
	if (BATtdense(l) && BAThdense(r))
		/* densefetchjoin -> sequential access */
		cost /= 7;
	else
	if (BATtordered(l) && BAThdense(r))
		/* orderedfetchjoin > sequential access */
		cost /= 6;
	else
	if (BATtdense(l) && BAThordered(r) && flag != 0 /* no leftjoin */)
		/* (reversed-) orderedfetchjoin -> sequential access */
		cost /= 6;
	else
	if (BAThdense(r) && rc <= SMALL_OPERAND)
		/* fetchjoin with random access in L1 */
		cost /= 5;
	else
	if (BATtdense(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */)
		/* (reversed-) fetchjoin with random access in L1 */
		cost /= 5;
	else
	if (BATtordered(l) && BAThordered(r))
		/* mergejoin > sequential access */
		cost /= 4;
	else
	if (BAThordered(r) && rc <= SMALL_OPERAND)
		/* binary-lookup-join with random access in L1 */
		cost /= 3;
	else
	if (BATtordered(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */)
		/* (reversed-) binary-lookup-join with random access in L1 */
		cost /= 3;
	else
	if ((BAThordered(r) && lc <= SMALL_OPERAND) || (BATtordered(l) && rc <= SMALL_OPERAND))
		/* sortmergejoin with sorting in L1 */
		cost /= 3;
	else
	if (rc <= SMALL_OPERAND)
		/* hashjoin with hashtable in L1 */
		cost /= 3;
	else
	if (lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */)
		/* (reversed-) hashjoin with hashtable in L1 */
		cost /= 3;
	else
	if (BAThdense(r))
		/* fetchjoin with random access beyond L1 */
		cost /= 2;
	else
	if (BATtdense(l) && flag != 0 /* no leftjoin */)
		/* (reversed-) fetchjoin with random access beyond L1 */
		cost /= 2;
	else
		/* hashjoin with hashtable larger than L1 */
		/* sortmergejoin with sorting beyond L1 */
		cost /= 1;

	ALGODEBUG
		fprintf(stderr,"#batjoin cost ?"BUNFMT"\n",cost);
	return cost;
}
Esempio n. 5
0
static str
AGGRsubgroupedExt(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, bat *sid,
			   int skip_nils, int abort_on_error, int tp,
			   BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			   gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, int, int, int),
			   BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, double, int, int),
			   bat *quantile,
			   const char *malfunc)
{
	BAT *b, *g, *e, *s, *bn = NULL, *cnts, *q = NULL;
	double qvalue;

   /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others are */
	assert((grpfunc1 && grpfunc2 == NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc) );

	/* if retval2 is non-NULL, we must have grpfunc2 */
	assert(retval2 == NULL || grpfunc2 != NULL);

	b = BATdescriptor(*bid);
	g = gid ? BATdescriptor(*gid) : NULL;
	e = eid ? BATdescriptor(*eid) : NULL;
	q = quantile ? BATdescriptor(*quantile) : NULL;

	if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == NULL)) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc == BATgroupquantile))
		tp = b->ttype;

	if (sid) {
		s = BATdescriptor(*sid);
		if (s == NULL) {
			BBPreleaseref(b->batCacheid);
			if (g)
				BBPreleaseref(g->batCacheid);
			if (e)
				BBPreleaseref(e->batCacheid);
			throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
		}
	} else {
		if (!BAThdense(b)) {
			/* XXX backward compatibility code: ignore non-dense head, but
			 * only if no candidate list */
			s = BATmirror(BATmark(BATmirror(b), 0));
			BBPreleaseref(b->batCacheid);
			b = s;
		}
		s = NULL;
	}
	if (grpfunc1)
		bn = (*grpfunc1)(b, g, e, s, tp, skip_nils, abort_on_error);
	if (quantilefunc) {
		assert(BATcount(q)>0);
		assert(q->ttype == TYPE_dbl);
		qvalue = ((const double *)Tloc(q, BUNfirst(q)))[0];
		if (qvalue <  0|| qvalue > 1) {
			char *s;
			s = createException(MAL, malfunc, "quantile value of %f is not in range [0,1]", qvalue);
			return s;
		}
		bn = (*quantilefunc)(b, g, e, s, tp, qvalue, skip_nils, abort_on_error);
	}
	if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, tp, skip_nils, abort_on_error) == GDK_FAIL)
		bn = NULL;

	BBPreleaseref(b->batCacheid);
	if (g)
		BBPreleaseref(g->batCacheid);
	if (e)
		BBPreleaseref(e->batCacheid);
	if (s)
		BBPreleaseref(s->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	*retval1 = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	if (retval2) {
		*retval2 = cnts->batCacheid;
		BBPkeepref(cnts->batCacheid);
	}
	return MAL_SUCCEED;
}
Esempio n. 6
0
/*
 * grouped aggregates
 */
static str
AGGRgrouped(bat *retval1, bat *retval2, BAT *b, BAT *g, BAT *e, int tp,
			BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, int, int, int),
			BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, double, int, int),
			BAT *quantile,
			int skip_nils,
			const char *malfunc)
{
	BAT *bn, *cnts = NULL, *t, *map;
	double qvalue;

   /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others are */
	assert((grpfunc1 != NULL && grpfunc2 == NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 != NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc != NULL) );
	/* if retval2 is non-NULL, we must have grpfunc2 */
	assert(retval2 == NULL || grpfunc2 != NULL);
	assert(quantile == NULL || quantilefunc != NULL);

	if (b == NULL || g == NULL || e == NULL) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc == BATgroupquantile))
		tp = b->ttype;
	if (!BAThdense(b) || !BAThdense(g)) {
		/* if b or g don't have a dense head, replace the head with a
		 * dense sequence */
		t = BATjoin(BATmirror(b), g, MIN(BATcount(b), BATcount(g)));
		BBPreleaseref(b->batCacheid);
		BBPreleaseref(g->batCacheid);
		b = BATmirror(BATmark(t, 0));
		g = BATmirror(BATmark(BATmirror(t), 0));
		BBPreleaseref(t->batCacheid);
	}
	if (b->hseqbase != g->hseqbase || BATcount(b) != BATcount(g)) {
		/* b and g are not aligned: align them by creating a view on
		 * one or the other */
		oid min;				/* lowest common oid */
		oid max;				/* highest common oid */
		min = b->hseqbase;
		if (min < g->hseqbase)
			min = g->hseqbase;
		max = b->hseqbase + BATcount(b);
		if (g->hseqbase + BATcount(g) < max)
			max = g->hseqbase + BATcount(g);
		if (b->hseqbase != min || b->hseqbase + BATcount(b) != max) {
			if (min >= max)
				min = max = b->hseqbase;
			t = BATslice(b, BUNfirst(b) + (BUN) (min - b->hseqbase),
						 BUNfirst(b) + (BUN) (max - b->hseqbase));
			BBPreleaseref(b->batCacheid);
			b = t;
		}
		if (g->hseqbase != min || g->hseqbase + BATcount(g) != max) {
			if (min >= max)
				min = max = g->hseqbase;
			t = BATslice(g, BUNfirst(g) + (BUN) (min - g->hseqbase),
						 BUNfirst(g) + (BUN) (max - g->hseqbase));
			BBPreleaseref(g->batCacheid);
			g = t;
		}
	}
	if (!BAThdense(e)) {
		/* if e doesn't have a dense head, renumber the group ids with
		 * a dense sequence at the cost of some left joins */
		map = BATmark(e, 0);	/* [gid,newgid(dense)] */
		BBPreleaseref(e->batCacheid);
		e = BATmirror(map);		/* [newgid(dense),gid] */
		t = BATleftjoin(g, map, BATcount(g)); /* [oid,newgid] */
		BBPreleaseref(g->batCacheid);
		g = t;
	} else {
		map = NULL;
	}
	if (grpfunc1)
		bn = (*grpfunc1)(b, g, e, NULL, tp, skip_nils, 1);
	if (quantilefunc) {
		assert(BATcount(quantile)>0);
		assert(quantile->ttype == TYPE_dbl);
		qvalue = ((const double *)Tloc(quantile, BUNfirst(quantile)))[0];
		if (qvalue <  0|| qvalue > 1) {
			char *s;
			s = createException(MAL, malfunc, "quantile value of %f is not in range [0,1]", qvalue);
			return s;
		}
		bn = (*quantilefunc)(b, g, e, NULL, tp, qvalue, skip_nils, 1);
	}
	if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, tp, skip_nils, 1) == GDK_FAIL)
		bn = NULL;
	if (bn != NULL && (grpfunc1 == BATgroupmin || grpfunc1 == BATgroupmax)) {
		t = BATproject(bn, b);
		BBPreleaseref(bn->batCacheid);
		bn = t;
	}
	BBPreleaseref(b->batCacheid);
	BBPreleaseref(g->batCacheid);
	if (map == NULL)			/* if map!=NULL, e is mirror of map */
		BBPreleaseref(e->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (map)
			BBPreleaseref(map->batCacheid);

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	if (map) {
		t = BATleftjoin(map, bn, BATcount(bn));
		BBPreleaseref(bn->batCacheid);
		bn = t;
		if (cnts) {
			t = BATleftjoin(map, cnts, BATcount(cnts));
			BBPreleaseref(cnts->batCacheid);
			cnts = t;
		}
		BBPreleaseref(map->batCacheid);
	}
	*retval1 = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	if (retval2) {
		*retval2 = cnts->batCacheid;
		BBPkeepref(cnts->batCacheid);
	}
	return MAL_SUCCEED;
}
Esempio n. 7
0
static str
AGGRsubgrouped(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid,
			   int skip_nils, int abort_on_error, int tp,
			   BAT *(*grpfunc)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			   const char *malfunc)
{
	BAT *b, *g, *e, *s, *bn;

	b = BATdescriptor(*bid);
	g = gid ? BATdescriptor(*gid) : NULL;
	e = eid ? BATdescriptor(*eid) : NULL;
	if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == NULL)) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && grpfunc == BATgroupmedian)
		tp = b->ttype;

	if (sid) {
		s = BATdescriptor(*sid);
		if (s == NULL) {
			BBPreleaseref(b->batCacheid);
			if (g)
				BBPreleaseref(g->batCacheid);
			if (e)
				BBPreleaseref(e->batCacheid);
			throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
		}
	} else {
		if (!BAThdense(b)) {
			/* XXX backward compatibility code: ignore non-dense head, but
			 * only if no candidate list */
			s = BATmirror(BATmark(BATmirror(b), 0));
			BBPreleaseref(b->batCacheid);
			b = s;
		}
		s = NULL;
	}
	bn = (*grpfunc)(b, g, e, s, tp, skip_nils, abort_on_error);
	BBPreleaseref(b->batCacheid);
	if (g)
		BBPreleaseref(g->batCacheid);
	if (e)
		BBPreleaseref(e->batCacheid);
	if (s)
		BBPreleaseref(s->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	*retval = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	return MAL_SUCCEED;
}
Esempio n. 8
0
/*
 * grouped aggregates
 */
static str
AGGRgrouped(bat *retval, BAT *b, BAT *g, BAT *e, int tp,
			BAT *(*grpfunc)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			int skip_nils,
			const char *malfunc)
{
	BAT *bn, *t, *map;

	if (b == NULL || g == NULL || e == NULL) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && grpfunc == BATgroupmedian)
		tp = b->ttype;
	if (!BAThdense(b) || !BAThdense(g)) {
		/* if b or g don't have a dense head, replace the head with a
		 * dense sequence */
		t = BATjoin(BATmirror(b), g, MIN(BATcount(b), BATcount(g)));
		BBPreleaseref(b->batCacheid);
		BBPreleaseref(g->batCacheid);
		b = BATmirror(BATmark(t, 0));
		g = BATmirror(BATmark(BATmirror(t), 0));
		BBPreleaseref(t->batCacheid);
	}
	if (b->hseqbase != g->hseqbase || BATcount(b) != BATcount(g)) {
		/* b and g are not aligned: align them by creating a view on
		 * one or the other */
		oid min;				/* lowest common oid */
		oid max;				/* highest common oid */
		min = b->hseqbase;
		if (min < g->hseqbase)
			min = g->hseqbase;
		max = b->hseqbase + BATcount(b);
		if (g->hseqbase + BATcount(g) < max)
			max = g->hseqbase + BATcount(g);
		if (b->hseqbase != min || b->hseqbase + BATcount(b) != max) {
			if (min >= max)
				min = max = b->hseqbase;
			t = BATslice(b, BUNfirst(b) + (BUN) (min - b->hseqbase),
						 BUNfirst(b) + (BUN) (max - b->hseqbase));
			BBPreleaseref(b->batCacheid);
			b = t;
		}
		if (g->hseqbase != min || g->hseqbase + BATcount(g) != max) {
			if (min >= max)
				min = max = g->hseqbase;
			t = BATslice(g, BUNfirst(g) + (BUN) (min - g->hseqbase),
						 BUNfirst(g) + (BUN) (max - g->hseqbase));
			BBPreleaseref(g->batCacheid);
			g = t;
		}
	}
	if (!BAThdense(e)) {
		/* if e doesn't have a dense head, renumber the group ids with
		 * a dense sequence at the cost of some left joins */
		map = BATmark(e, 0);	/* [gid,newgid(dense)] */
		BBPreleaseref(e->batCacheid);
		e = BATmirror(map);		/* [newgid(dense),gid] */
		t = BATleftjoin(g, map, BATcount(g)); /* [oid,newgid] */
		BBPreleaseref(g->batCacheid);
		g = t;
	} else {
		map = NULL;
	}
	bn = (*grpfunc)(b, g, e, NULL, tp, skip_nils, 1);
	if (bn != NULL && (grpfunc == BATgroupmin || grpfunc == BATgroupmax)) {
		BAT *bnn = BATouterjoin(bn, b, BATcount(bn));
		BBPreleaseref(bn->batCacheid);
		bn = bnn;
	}
	BBPreleaseref(b->batCacheid);
	BBPreleaseref(g->batCacheid);
	if (map == NULL)			/* if map!=NULL, e is mirror of map */
		BBPreleaseref(e->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (map)
			BBPreleaseref(map->batCacheid);

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	if (map) {
		t = BATleftjoin(map, bn, BATcount(bn));
		BBPreleaseref(map->batCacheid);
		BBPreleaseref(bn->batCacheid);
		bn = t;
	}
	*retval = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	return MAL_SUCCEED;
}
Esempio n. 9
0
static gdk_return
CMDinfo(BAT **ret1, BAT **ret2, BAT *b)
{
	BAT *bk, *bv;
	const char *mode, *accessmode;

	if (!(bk = BATnew(TYPE_void, TYPE_str, 128, TRANSIENT)))
		return GDK_FAIL;
	if (!(bv = BATnew(TYPE_void, TYPE_str, 128, TRANSIENT))) {
		BBPreclaim(bk);
		return GDK_FAIL;
	}
	BATseqbase(bk,0);
	BATseqbase(bv,0);
	*ret1 = bk;
	*ret2 = bv;

	if (b->batPersistence == PERSISTENT) {
		mode = "persistent";
	} else if (b->batPersistence == TRANSIENT) {
		mode = "transient";
	} else {
		mode ="unknown";
	}

	switch (b->batRestricted) {
	case BAT_READ:
		accessmode = "read-only";
		break;
	case BAT_WRITE:
		accessmode = "updatable";
		break;
	case BAT_APPEND:
		accessmode = "append-only";
		break;
	default:
		accessmode = "unknown";
	}

	BUNappend(bk, "batId", FALSE);
	BUNappend(bv, BATgetId(b),FALSE);
	BUNappend(bk, "batCacheid", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->batCacheid)),FALSE);
	BUNappend(bk, "hparentid", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->H->heap.parentid)),FALSE);
	BUNappend(bk, "tparentid", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->T->heap.parentid)),FALSE);
	BUNappend(bk, "batSharecnt", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->batSharecnt)),FALSE);
	BUNappend(bk, "batCount", FALSE);
	BUNappend(bv, local_utoa((size_t)b->batCount),FALSE);
	BUNappend(bk, "batCapacity", FALSE);
	BUNappend(bv, local_utoa((size_t)b->batCapacity),FALSE);
	BUNappend(bk, "head", FALSE);
	BUNappend(bv, ATOMname(b->htype),FALSE);
	BUNappend(bk, "tail", FALSE);
	BUNappend(bv, ATOMname(b->ttype),FALSE);
	BUNappend(bk, "batPersistence", FALSE);
	BUNappend(bv, mode,FALSE);
	BUNappend(bk, "batRestricted", FALSE);
	BUNappend(bv, accessmode,FALSE);
	BUNappend(bk, "batRefcnt", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(BBP_refs(b->batCacheid))),FALSE);
	BUNappend(bk, "batLRefcnt", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(BBP_lrefs(b->batCacheid))),FALSE);
	BUNappend(bk, "batDirty", FALSE);
	BUNappend(bv, BATdirty(b) ? "dirty" : "clean",FALSE);

	BUNappend(bk, "hsorted", FALSE);
	BUNappend(bv, local_itoa((ssize_t)BAThordered(b)),FALSE);
	BUNappend(bk, "hrevsorted", FALSE);
	BUNappend(bv, local_itoa((ssize_t)BAThrevordered(b)),FALSE);
	BUNappend(bk, "hident", FALSE);
	BUNappend(bv, b->hident,FALSE);
	BUNappend(bk, "hdense", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(BAThdense(b))),FALSE);
	BUNappend(bk, "hseqbase", FALSE);
	BUNappend(bv, oidtostr(b->hseqbase),FALSE);
	BUNappend(bk, "hkey", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->hkey)),FALSE);
	BUNappend(bk, "hvarsized", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->hvarsized)),FALSE);
	BUNappend(bk, "halign", FALSE);
	BUNappend(bv, local_utoa(b->halign),FALSE);
	BUNappend(bk, "hnosorted", FALSE);
	BUNappend(bv, local_utoa(b->H->nosorted),FALSE);
	BUNappend(bk, "hnorevsorted", FALSE);
	BUNappend(bv, local_utoa(b->H->norevsorted),FALSE);
	BUNappend(bk, "hnodense", FALSE);
	BUNappend(bv, local_utoa(b->H->nodense),FALSE);
	BUNappend(bk, "hnokey[0]", FALSE);
	BUNappend(bv, local_utoa(b->H->nokey[0]),FALSE);
	BUNappend(bk, "hnokey[1]", FALSE);
	BUNappend(bv, local_utoa(b->H->nokey[1]),FALSE);
	BUNappend(bk, "hnonil", FALSE);
	BUNappend(bv, local_utoa(b->H->nonil),FALSE);
	BUNappend(bk, "hnil", FALSE);
	BUNappend(bv, local_utoa(b->H->nil),FALSE);

	BUNappend(bk, "tident", FALSE);
	BUNappend(bv, b->tident,FALSE);
	BUNappend(bk, "tdense", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(BATtdense(b))), FALSE);
	BUNappend(bk, "tseqbase", FALSE);
	BUNappend(bv, oidtostr(b->tseqbase), FALSE);
	BUNappend(bk, "tsorted", FALSE);
	BUNappend(bv, local_itoa((ssize_t)BATtordered(b)), FALSE);
	BUNappend(bk, "trevsorted", FALSE);
	BUNappend(bv, local_itoa((ssize_t)BATtrevordered(b)), FALSE);
	BUNappend(bk, "tkey", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->tkey)), FALSE);
	BUNappend(bk, "tvarsized", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->tvarsized)), FALSE);
	BUNappend(bk, "talign", FALSE);
	BUNappend(bv, local_utoa(b->talign), FALSE);
	BUNappend(bk, "tnosorted", FALSE);
	BUNappend(bv, local_utoa(b->T->nosorted), FALSE);
	BUNappend(bk, "tnorevsorted", FALSE);
	BUNappend(bv, local_utoa(b->T->norevsorted), FALSE);
	BUNappend(bk, "tnodense", FALSE);
	BUNappend(bv, local_utoa(b->T->nodense), FALSE);
	BUNappend(bk, "tnokey[0]", FALSE);
	BUNappend(bv, local_utoa(b->T->nokey[0]), FALSE);
	BUNappend(bk, "tnokey[1]", FALSE);
	BUNappend(bv, local_utoa(b->T->nokey[1]), FALSE);
	BUNappend(bk, "tnonil", FALSE);
	BUNappend(bv, local_utoa(b->T->nonil), FALSE);
	BUNappend(bk, "tnil", FALSE);
	BUNappend(bv, local_utoa(b->T->nil), FALSE);

	BUNappend(bk, "batInserted", FALSE);
	BUNappend(bv, local_utoa(b->batInserted), FALSE);
	BUNappend(bk, "batDeleted", FALSE);
	BUNappend(bv, local_utoa(b->batDeleted), FALSE);
	BUNappend(bk, "batFirst", FALSE);
	BUNappend(bv, local_utoa(b->batFirst), FALSE);
	BUNappend(bk, "htop", FALSE);
	BUNappend(bv, local_utoa(b->H->heap.free), FALSE);
	BUNappend(bk, "ttop", FALSE);
	BUNappend(bv, local_utoa(b->T->heap.free), FALSE);
	BUNappend(bk, "batStamp", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->batStamp)), FALSE);
	BUNappend(bk, "lastUsed", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(BBP_lastused(b->batCacheid))), FALSE);
	BUNappend(bk, "curStamp", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(BBPcurstamp())), FALSE);
	BUNappend(bk, "batCopiedtodisk", FALSE);
	BUNappend(bv, local_itoa((ssize_t)(b->batCopiedtodisk)), FALSE);
	BUNappend(bk, "batDirtydesc", FALSE);
	BUNappend(bv, b->batDirtydesc ? "dirty" : "clean", FALSE);

	BUNappend(bk, "H->heap.dirty", FALSE);
	BUNappend(bv, b->H->heap.dirty ? "dirty" : "clean", FALSE);
	BUNappend(bk, "T->heap.dirty", FALSE);
	BUNappend(bv, b->T->heap.dirty ? "dirty" : "clean", FALSE);
	infoHeap(bk, bv, &b->H->heap, "head.");
	infoHeap(bk, bv, &b->T->heap, "tail.");

	BUNappend(bk, "H->vheap->dirty", FALSE);
	BUNappend(bv, (b->H->vheap && b->H->vheap->dirty) ? "dirty" : "clean", FALSE);
	infoHeap(bk, bv, b->H->vheap, "hheap.");

	BUNappend(bk, "T->vheap->dirty", FALSE);
	BUNappend(bv, (b->T->vheap && b->T->vheap->dirty) ? "dirty" : "clean", FALSE);
	infoHeap(bk, bv, b->T->vheap, "theap.");

	/* dump index information */
	if (b->H->hash) {
		HASHinfo(bk, bv, b->H->hash, "hhash->");
	}
	if (b->T->hash) {
		HASHinfo(bk, bv, b->T->hash, "thash->");
	}
	assert(BATcount(bk) == BATcount(bv));
	return GDK_SUCCEED;
}