Ejemplo n.º 1
0
/*
 * @-
 * The operator below is only working for a very limited
 * case. It also re-uses oids, which may become a semantic
 * problem quickly.
 */
str DCdeleteUpperSlice(int *ret, int *bid, int *pos)
{
	BAT *b;
	int *readerT, *writerT;
	BUN size, i;

	(void) ret;

	if ((b = BATdescriptor(*bid)) == NULL)
		throw(MAL, "dc.deleteUpperSlice", "Cannot access input BAT");
	/* check for a failure */
	assert(b != NULL);

	/* remove Hashes etc */
	if (b->H->hash)
		HASHremove(b);
	if (b->T->hash)
		HASHremove(BATmirror(b));

	size = BATcount(b);
	writerT = (int *) Tloc(b, BUNfirst(b));
	readerT = (int *) Tloc(b, BUNfirst(b)) + *pos;

	for (i = *pos; i < size; i++)
		*writerT++ = *readerT++;
	b->batInserted -= *pos;

	BATsetcount(b, (BUN) (writerT - (int *) Tloc(b, BUNfirst(b))));
	BBPunfix(*bid);
	b->batDirty = TRUE;
	return MAL_SUCCEED;
}
Ejemplo n.º 2
0
static BAT*
MATsortloop_( bte *map_res, BAT *i1, bte *map_i1, BUN cnt_i1, BAT *i2, bte map_i2, BUN cnt_i2) 
{
	int c;
	BUN val_i1 = BUNfirst(i1);
	BUN val_i2 = BUNfirst(i2);
	BUN end_i1 = val_i1 + cnt_i1;
	BUN end_i2 = val_i2 + cnt_i2;
	BATiter bi_i1 = bat_iterator(i1); 
	BATiter bi_i2 = bat_iterator(i2);
	int (*cmp) (const void *, const void *) = BATatoms[i1->ttype].atomCmp;
	BAT *res = BATnew(TYPE_void, i1->ttype, cnt_i1 + cnt_i2, TRANSIENT);

	if (res == NULL)
		return NULL;
	BATseqbase(res, 0);
	if (map_i1 == NULL) {
		/* map_i1 = 0 */
		while ( val_i1 < end_i1 && val_i2 < end_i2) {
			if ((c = cmp(BUNtail(bi_i1,val_i1),BUNtail(bi_i2,val_i2))) <= 0) {
				BUNappend(res, BUNtail(bi_i1,val_i1), FALSE);
				*map_res++ = 0;
				val_i1++;
			} else if (c > 0) {
				BUNappend(res, BUNtail(bi_i2,val_i2), FALSE);
				*map_res++ = map_i2;
				val_i2++;
			}
		}
		while ( val_i1 < end_i1 ) {
			BUNappend(res, BUNtail(bi_i1,val_i1), FALSE);
			*map_res++ = 0;
			val_i1++;
		}
	} else {
		while ( val_i1 < end_i1 && val_i2 < end_i2) {
			if ((c = cmp(BUNtail(bi_i1,val_i1),BUNtail(bi_i2,val_i2))) <= 0) {
				BUNappend(res, BUNtail(bi_i1,val_i1), FALSE);
				*map_res++ = *map_i1++;
				val_i1++;
			} else if (c > 0) {
				BUNappend(res, BUNtail(bi_i2,val_i2), FALSE);
				*map_res++ = map_i2;
				val_i2++;
			}
		}
		while ( val_i1 < end_i1 ) {
			BUNappend(res, BUNtail(bi_i1,val_i1), FALSE);
			*map_res++ = *map_i1++;
			val_i1++;
		}
	}
	while ( val_i2 < end_i2 ) {
		BUNappend(res, BUNtail(bi_i2,val_i2), FALSE);
		*map_res++ = map_i2;
		val_i2++;
	}
	return res;
}
Ejemplo n.º 3
0
str DCselectInsert(int *ret, int *res, int *bid, lng *low, lng *hgh)
{
	BAT *b, *r;

	lng *readerH, *writerH;
	lng *readerT, *writerT;

	BUN size, i;

	(void) ret;

	if ((b = BATdescriptor(*bid)) == NULL)
		throw(MAL, "dc.selectInsert", "Cannot access input BAT");

	if ((r = BATdescriptor(*res)) == NULL)
		throw(MAL, "dc.selectInsert", "Cannot access result BAT");

	size = BATcount(b);

	if (size > BATcapacity(r) - BATcount(r)) {
		BUN ncap;
		BUN grows;
		BUN needed = size - (BATcapacity(r) - BATcount(r));
		ncap = BATcapacity(r) + needed;
		grows = BATgrows(r);
		if (ncap > grows)
			grows = ncap;
		if (BATextend(r, grows) == NULL)
			throw(MAL, "dc.selectInsert", "Failed to make room for the new values");
	}
/*printf("in dc.selectInsert size is "OIDFMT,size);*/
	writerH = (lng *) Hloc(r, BUNfirst(r));
	writerT = (lng *) Tloc(r, BUNfirst(r));

	readerH = (lng *) Hloc(b, BUNfirst(b));
	readerT = (lng *) Tloc(b, BUNfirst(b));

	for (i = 0; i < size; i++) {
		if (*readerT >= *low && *readerT <= *hgh) {
			*writerH = *readerH;
			*writerT = *readerT;

			writerH++;
			writerT++;
		}
		readerH++;
		readerT++;
	}
	BATsetcount(r, (BUN) (writerT - (lng *) Tloc(r, BUNfirst(r))));

	BBPunfix(*bid);
	BBPunfix(*res);

	return MAL_SUCCEED;
}
Ejemplo n.º 4
0
/*
 * @-
 * The operator below is only working for a very limited cases.
 */
str DCreplaceTailBasedOnHead(int *ret, int *res, int *bid)
{
	BAT *b, *r;
	oid *readerH_b;
	int *writerT_r, *readerT_b;
	BUN size_b, size_r, i;

	(void) ret;

	if ((b = BATdescriptor(*bid)) == NULL)
		throw(MAL, "dc.replaceTailBasedOnHead", "Cannot access input BAT");
	/* check for a failure */
	assert(b != NULL);

	if ((r = BATdescriptor(*res)) == NULL)
		throw(MAL, "dc.replaceTailBasedOnHead", "Cannot access result BAT");
	/* check for a failure */
	assert(r != NULL);


	/* remove Hashes etc */
	if (r->H->hash)
		HASHremove(r);
	if (r->T->hash)
		HASHremove(BATmirror(r));

	size_r = BATcount(r);
	size_b = BATcount(b);


	if ((b->htype == TYPE_void) && (size_b == size_r)) {
		writerT_r = (int *) Tloc(r, BUNfirst(r));
		readerT_b = (int *) Tloc(b, BUNfirst(b));
		for (i = 0; i < size_r; i++) {
			*writerT_r = *readerT_b;
			writerT_r++;
			readerT_b++;
		}
	} else if ((b->htype != TYPE_void) && (size_b < size_r)) {
		readerH_b = (oid *) Hloc(b, BUNfirst(b));
		readerT_b = (int *) Tloc(b, BUNfirst(b));
		for (i = 0; i < size_b; i++) {
			writerT_r = (int *) Tloc(r, BUNfirst(r)) + *readerH_b;
			*writerT_r = *readerT_b;
			readerH_b++;
			readerT_b++;
		}
	}

	BBPunfix(*bid);
	BBPunfix(*res);
	r->batDirty = TRUE;
	return MAL_SUCCEED;
}
Ejemplo n.º 5
0
/*
 * @-
 * The BUN- and BAT-stream manipulate a long handle, i.e.
 * the destination variable. It assumes it has been set to
 * zero as part of runtime stack initialization. Subsequently,
 * it fetches a bun and returns the increment to the control
 * variable. If it returns zero the control variable has been reset
 * to zero and end of stream has been reached.
 */
str
ITRbunIterator(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
	BATiter bi;
	BAT *b;
	oid *head;
	int *bid;
	ValPtr tail;

	(void) cntxt;
	(void) mb;
	head = (oid *) getArgReference(stk, pci, 0);
	tail = getArgReference(stk,pci,1);
	bid = (int *) getArgReference(stk, pci, 2);

	if ((b = BATdescriptor(*bid)) == NULL) {
		throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
	}

	if (BATcount(b) == 0) {
		*head = oid_nil;
		BBPunfix(b->batCacheid);
		return MAL_SUCCEED;
	}
	*head = BUNfirst(b);

 	bi = bat_iterator(b);
	VALinit(tail, b->ttype, BUNtail(bi, *(BUN*) head));
	BBPunfix(b->batCacheid);
	return MAL_SUCCEED;
}
Ejemplo n.º 6
0
/*
 * The nextChunk version advances the reader,
 * which also means that the view descriptor is already available.
 * The granule size may differ in each call.
 */
str
ITRnextChunk(lng *res, int *vid, int *bid, lng *granule)
{
	BAT *b, *view;
	BUN i;

	if ((b = BATdescriptor(*bid)) == NULL) {
			throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
	}
	if ((view = BATdescriptor(*vid)) == NULL) {
		BBPunfix(b->batCacheid);
		throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
	}
	i = (BUN) (*res + BATcount(view));
	if (i >= BUNlast(b)) {
		*res = lng_nil;
		*vid = 0;
		BBPunfix(view->batCacheid);
		BBPunfix(b->batCacheid);
		return MAL_SUCCEED;
	}
	/* printf("set bat chunk bound to " BUNFMT " - " BUNFMT " \n",
	   i, i+(BUN) *granule-1); */
	VIEWbounds(b, view, i, i + (BUN) * granule);
	BATseqbase(view, b->hseqbase == oid_nil ? oid_nil : b->hseqbase + i - BUNfirst(b));
	BBPkeepref(*vid = view->batCacheid);
	BBPunfix(b->batCacheid);
	*res = i;
	return MAL_SUCCEED;
}
Ejemplo n.º 7
0
void
ALIGNsetH(BAT *b1, BAT *b2)
{
	ssize_t diff;

	if (b1 == NULL || b2 == NULL)
		return;

	diff = (ssize_t) (BUNfirst(b1) - BUNfirst(b2));
	if (b2->halign == 0) {
		b2->halign = OIDnew(1);
		b2->batDirtydesc = TRUE;
	}
	if (BAThvoid(b2)) {
		/* b2 is either dense or has a void(nil) head */
		if (b1->htype != TYPE_void)
			b1->hdense = TRUE;
		else if (b2->hseqbase == oid_nil)
			b1->H->nonil = FALSE;
		BATseqbase(b1, b2->hseqbase);
	} else if (b1->htype != TYPE_void) {
		/* b2 is not dense, so set b1 not dense */
		b1->hdense = FALSE;
		BATseqbase(b1, oid_nil);
		b1->H->nonil = b2->H->nonil;
	} else if (BAThkey(b2))
		BATseqbase(b1, 0);
	BATkey(b1, BAThkey(b2));
	b1->hsorted = BAThordered(b2);
	b1->hrevsorted = BAThrevordered(b2);
	b1->halign = b2->halign;
	b1->batDirtydesc = TRUE;
	b1->H->norevsorted = (BUN) (b2->H->norevsorted + diff);
	b1->H->nokey[0] = (BUN) (b2->H->nokey[0] + diff);
	b1->H->nokey[1] = (BUN) (b2->H->nokey[1] + diff);
	b1->H->nosorted = (BUN) (b2->H->nosorted + diff);
	b1->H->nodense = (BUN) (b2->H->nodense + diff);
}
Ejemplo n.º 8
0
/*
 * We start with the large chunk iterator.
 * The definition of the control statements require the same
 * control variables, which means that the BATview is accessible
 * to determine how far to advance when the next chunk is retrieved.
 * The number of elements in the chunk is limited by the granule
 * size.
 */
str
ITRnewChunk(lng *res, int *vid, int *bid, lng *granule)
{
	BAT *b, *view;
	BUN cnt, first;

	if ((b = BATdescriptor(*bid)) == NULL) {
		throw(MAL, "chop.newChunk", INTERNAL_BAT_ACCESS);
	}
	cnt = BATcount(b);
	first = BUNfirst(b);
	view = VIEWcreate_(b, b, TRUE);

	/*  printf("set bat chunk bound to " LLFMT " " BUNFMT " - " BUNFMT "\n",
	 *granule, first, MIN(cnt,(BUN) *granule)); */
	VIEWbounds(b, view, (BUN) first, first + MIN(cnt, (BUN) * granule));
	BATseqbase(view, b->hseqbase);
	*vid = view->batCacheid;
	BBPkeepref(view->batCacheid);
	BBPunfix(b->batCacheid);
	*res = first;
	return MAL_SUCCEED;
}
Ejemplo n.º 9
0
Archivo: json.c Proyecto: f7753/monetdb
static str
JSONrenderRowObject(BAT **bl, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, BUN idx)
{
	int i, tpe;
	char *row, *name = 0, *val = 0;
	size_t len, lim, l;
	void *p;
	BATiter bi;

	row = (char *) GDKmalloc(lim = BUFSIZ);
	row[0] = '{';
	row[1] = 0;
	len = 1;
	val = (char *) GDKmalloc(BUFSIZ);
	for (i = pci->retc; i < pci->argc; i += 2) {
		name = stk->stk[getArg(pci, i)].val.sval;
		bi = bat_iterator(bl[i + 1]);
		p = BUNtail(bi, BUNfirst(bl[i + 1]) + idx);
		tpe = getColumnType(getArgType(mb, pci, i + 1));
		ATOMformat(tpe, p, &val);
		if (strncmp(val, "nil", 3) == 0)
			strcpy(val, "null");
		l = strlen(name) + strlen(val);
		while (l > lim - len)
			row = (char *) GDKrealloc(row, lim += BUFSIZ);
		snprintf(row + len, lim - len, "\"%s\":%s,", name, val);
		len += l + 4;
	}
	if (row[1])
		row[len - 1] = '}';
	else {
		row[1] = '}';
		row[2] = 0;
	}
	GDKfree(val);
	return row;
}
Ejemplo n.º 10
0
/*
 * The remainder are utilities to manipulate the BAT view and not to
 * forget some details in the process.  It expects a position range in
 * the underlying BAT and compensates for outliers.
 */
void
VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
{
	BUN cnt;
	BATiter bi = bat_iterator(b);

	if (b == NULL || view == NULL) {
		GDKerror("VIEWbounds: bat argument missing");
		return;
	}
	if (h > BATcount(b))
		h = BATcount(b);
	if (h < l)
		h = l;
	l += BUNfirst(b);
	view->batFirst = view->batDeleted = view->batInserted = 0;
	cnt = h - l;
	view->H->heap.base = (view->htype) ? BUNhloc(bi, l) : NULL;
	view->T->heap.base = (view->ttype) ? BUNtloc(bi, l) : NULL;
	view->H->heap.size = headsize(view, cnt);
	view->T->heap.size = tailsize(view, cnt);
	BATsetcount(view, cnt);
	BATsetcapacity(view, cnt);
}
Ejemplo n.º 11
0
Archivo: json.c Proyecto: f7753/monetdb
static str
JSONrenderRowArray(BAT **bl, MalBlkPtr mb, InstrPtr pci, BUN idx)
{
	int i, tpe;
	char *row, *val = 0;
	size_t len, lim, l;
	void *p;
	BATiter bi;

	row = (char *) GDKmalloc(lim = BUFSIZ);
	row[0] = '[';
	row[1] = 0;
	len = 1;
	val = (char *) GDKmalloc(BUFSIZ);
	for (i = pci->retc; i < pci->argc; i++) {
		bi = bat_iterator(bl[i]);
		p = BUNtail(bi, BUNfirst(bl[i]) + idx);
		tpe = getColumnType(getArgType(mb, pci, i));
		ATOMformat(tpe, p, &val);
		if (strncmp(val, "nil", 3) == 0)
			strcpy(val, "null");
		l = strlen(val);
		while (l > lim - len)
			row = (char *) GDKrealloc(row, lim += BUFSIZ);
		snprintf(row + len, lim - len, "%s,", val);
		len += l + 1;
	}
	if (row[1])
		row[len - 1] = ']';
	else {
		row[1] = '}';
		row[2] = 0;
	}
	GDKfree(val);
	return row;
}
Ejemplo n.º 12
0
gdk_return
BATmaterializeh(BAT *b)
{
	int ht;
	BUN cnt;
	Heap head;
	BUN p, q;
	oid h, *x;
	bte tshift;

	BATcheck(b, "BATmaterialize", GDK_FAIL);
	assert(!isVIEW(b));
	ht = b->htype;
	cnt = BATcapacity(b);
	head = b->H->heap;
	p = BUNfirst(b);
	q = BUNlast(b);
	assert(cnt >= q - p);
	ALGODEBUG fprintf(stderr, "#BATmaterialize(%d);\n", (int) b->batCacheid);

	if (!BAThdense(b) || ht != TYPE_void) {
		/* no voids */
		return GDK_SUCCEED;
	}
	ht = TYPE_oid;

	/* cleanup possible ACC's */
	HASHdestroy(b);
	IMPSdestroy(b);

	b->H->heap.filename = NULL;
	if (HEAPalloc(&b->H->heap, cnt, sizeof(oid)) != GDK_SUCCEED) {
		b->H->heap = head;
		return GDK_FAIL;
	}

	/* point of no return */
	b->htype = ht;
	tshift = b->T->shift;
	BATsetdims(b);
	if (b->ttype) {
		b->T->shift = tshift;	/* restore in case it got changed */
		b->T->width = 1 << tshift;
	}
	b->batDirty = TRUE;
	b->batDirtydesc = TRUE;
	b->H->heap.dirty = TRUE;

	/* set the correct dense info */
	b->hdense = TRUE;

	/* So now generate [h..h+cnt-1] */
	h = b->hseqbase;
	x = (oid *) b->H->heap.base;
	for (; p < q; p++)
		*x++ = h++;
	cnt = h - b->hseqbase;
	BATsetcount(b, cnt);

	/* cleanup the old heaps */
	HEAPfree(&head, 0);
	return GDK_SUCCEED;
}
Ejemplo n.º 13
0
/*
 * grouped aggregates
 */
static str
AGGRgrouped(bat *retval, BAT *b, BAT *g, BAT *e, int tp,
			BAT *(*grpfunc)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			int skip_nils,
			const char *malfunc)
{
	BAT *bn, *t, *map;

	if (b == NULL || g == NULL || e == NULL) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && grpfunc == BATgroupmedian)
		tp = b->ttype;
	if (!BAThdense(b) || !BAThdense(g)) {
		/* if b or g don't have a dense head, replace the head with a
		 * dense sequence */
		t = BATjoin(BATmirror(b), g, MIN(BATcount(b), BATcount(g)));
		BBPreleaseref(b->batCacheid);
		BBPreleaseref(g->batCacheid);
		b = BATmirror(BATmark(t, 0));
		g = BATmirror(BATmark(BATmirror(t), 0));
		BBPreleaseref(t->batCacheid);
	}
	if (b->hseqbase != g->hseqbase || BATcount(b) != BATcount(g)) {
		/* b and g are not aligned: align them by creating a view on
		 * one or the other */
		oid min;				/* lowest common oid */
		oid max;				/* highest common oid */
		min = b->hseqbase;
		if (min < g->hseqbase)
			min = g->hseqbase;
		max = b->hseqbase + BATcount(b);
		if (g->hseqbase + BATcount(g) < max)
			max = g->hseqbase + BATcount(g);
		if (b->hseqbase != min || b->hseqbase + BATcount(b) != max) {
			if (min >= max)
				min = max = b->hseqbase;
			t = BATslice(b, BUNfirst(b) + (BUN) (min - b->hseqbase),
						 BUNfirst(b) + (BUN) (max - b->hseqbase));
			BBPreleaseref(b->batCacheid);
			b = t;
		}
		if (g->hseqbase != min || g->hseqbase + BATcount(g) != max) {
			if (min >= max)
				min = max = g->hseqbase;
			t = BATslice(g, BUNfirst(g) + (BUN) (min - g->hseqbase),
						 BUNfirst(g) + (BUN) (max - g->hseqbase));
			BBPreleaseref(g->batCacheid);
			g = t;
		}
	}
	if (!BAThdense(e)) {
		/* if e doesn't have a dense head, renumber the group ids with
		 * a dense sequence at the cost of some left joins */
		map = BATmark(e, 0);	/* [gid,newgid(dense)] */
		BBPreleaseref(e->batCacheid);
		e = BATmirror(map);		/* [newgid(dense),gid] */
		t = BATleftjoin(g, map, BATcount(g)); /* [oid,newgid] */
		BBPreleaseref(g->batCacheid);
		g = t;
	} else {
		map = NULL;
	}
	bn = (*grpfunc)(b, g, e, NULL, tp, skip_nils, 1);
	if (bn != NULL && (grpfunc == BATgroupmin || grpfunc == BATgroupmax)) {
		BAT *bnn = BATouterjoin(bn, b, BATcount(bn));
		BBPreleaseref(bn->batCacheid);
		bn = bnn;
	}
	BBPreleaseref(b->batCacheid);
	BBPreleaseref(g->batCacheid);
	if (map == NULL)			/* if map!=NULL, e is mirror of map */
		BBPreleaseref(e->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (map)
			BBPreleaseref(map->batCacheid);

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	if (map) {
		t = BATleftjoin(map, bn, BATcount(bn));
		BBPreleaseref(map->batCacheid);
		BBPreleaseref(bn->batCacheid);
		bn = t;
	}
	*retval = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	return MAL_SUCCEED;
}
Ejemplo n.º 14
0
/* BATsample implements sampling for void headed BATs */
BAT *
BATsample(BAT *b, BUN n)
{
	BAT *bn;
	BUN cnt, slen;
	BUN rescnt;
	struct oidtreenode *tree = NULL;

	BATcheck(b, "BATsample", NULL);
	assert(BAThdense(b));
	ERRORcheck(n > BUN_MAX, "BATsample: sample size larger than BUN_MAX\n", NULL);
	ALGODEBUG
		fprintf(stderr, "#BATsample: sample " BUNFMT " elements.\n", n);

	cnt = BATcount(b);
	/* empty sample size */
	if (n == 0) {
		bn = BATnew(TYPE_void, TYPE_void, 0, TRANSIENT);
		if (bn == NULL) {
			GDKerror("BATsample: memory allocation error");
			return NULL;
		}
		BATsetcount(bn, 0);
		BATseqbase(bn, 0);
		BATseqbase(BATmirror(bn), 0);
	/* sample size is larger than the input BAT, return all oids */
	} else if (cnt <= n) {
		bn = BATnew(TYPE_void, TYPE_void, cnt, TRANSIENT);
		if (bn == NULL) {
			GDKerror("BATsample: memory allocation error");
			return NULL;
		}
		BATsetcount(bn, cnt);
		BATseqbase(bn, 0);
		BATseqbase(BATmirror(bn), b->H->seq);
	} else {
		oid minoid = b->hseqbase;
		oid maxoid = b->hseqbase + cnt;
		/* if someone samples more than half of our tree, we
		 * do the antiset */
		bit antiset = n > cnt / 2;
		slen = n;
		if (antiset)
			n = cnt - n;

		tree = GDKmalloc(n * sizeof(struct oidtreenode));
		if (tree == NULL) {
			GDKerror("#BATsample: memory allocation error");
			return NULL;
		}
		bn = BATnew(TYPE_void, TYPE_oid, slen, TRANSIENT);
		if (bn == NULL) {
			GDKfree(tree);
			GDKerror("#BATsample: memory allocation error");
			return NULL;
		}
		/* while we do not have enough sample OIDs yet */
		for (rescnt = 0; rescnt < n; rescnt++) {
			oid candoid;
			do {
				/* generate a new random OID */
				candoid = (oid) (minoid + DRAND * (maxoid - minoid));
				/* if that candidate OID was already
				 * generated, try again */
			} while (!OIDTreeMaybeInsert(tree, candoid, rescnt));
		}
		if (!antiset) {
			OIDTreeToBAT(tree, bn);
		} else {
			OIDTreeToBATAntiset(tree, bn, minoid, maxoid);
		}
		GDKfree(tree);

		BATsetcount(bn, slen);
		bn->trevsorted = bn->batCount <= 1;
		bn->tsorted = 1;
		bn->tkey = 1;
		bn->tdense = bn->batCount <= 1;
		if (bn->batCount == 1)
			bn->tseqbase = *(oid *) Tloc(bn, BUNfirst(bn));
		bn->hdense = 1;
		bn->hseqbase = 0;
		bn->hkey = 1;
		bn->hrevsorted = bn->batCount <= 1;
		bn->hsorted = 1;
	}
	return bn;
}
Ejemplo n.º 15
0
/*
 * The prime routine for the BAT layer is to create a new hash index.
 * Its argument is the element type and the maximum number of BUNs be
 * stored under the hash function.
 */
gdk_return
BAThash(BAT *b, BUN masksize)
{
	BAT *o = NULL;
	lng t0 = 0, t1 = 0;

	if (BATcheckhash(b)) {
		if (o != NULL) {
			o->T->hash = b->T->hash;
			BBPunfix(b->batCacheid);
		}
		return GDK_SUCCEED;
	}
	MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BAThash");
	if (b->T->hash == NULL) {
		unsigned int tpe = ATOMbasetype(b->ttype);
		BUN cnt = BATcount(b);
		BUN mask, maxmask = 0;
		BUN p = BUNfirst(b), q = BUNlast(b), r;
		Hash *h = NULL;
		Heap *hp;
		const char *nme = BBP_physical(b->batCacheid);
		const char *ext = b->batCacheid > 0 ? "thash" : "hhash";
		BATiter bi = bat_iterator(b);
#ifdef PERSISTENTHASH
		int fd;
#endif

		ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b));
		if ((hp = GDKzalloc(sizeof(*hp))) == NULL ||
		    (hp->farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0 ||
		    (hp->filename = GDKmalloc(strlen(nme) + 12)) == NULL) {
			MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash");
			GDKfree(hp);
			return GDK_FAIL;
		}
		sprintf(hp->filename, "%s.%s", nme, ext);

		/* cnt = 0, hopefully there is a proper capacity from
		 * which we can derive enough information */
		if (!cnt)
			cnt = BATcapacity(b);

		if (b->ttype == TYPE_void) {
			if (b->tseqbase == oid_nil) {
				MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash");
				ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n");
				GDKfree(hp->filename);
				GDKfree(hp);
				return GDK_FAIL;
			}
			ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n");

			tpe = TYPE_void;
		}
		/* determine hash mask size p = first; then no dynamic
		 * scheme */
		if (masksize > 0) {
			mask = HASHmask(masksize);
		} else if (ATOMsize(tpe) == 1) {
			mask = (1 << 8);
		} else if (ATOMsize(tpe) == 2) {
			mask = (1 << 16);
		} else if (b->tkey) {
			mask = HASHmask(cnt);
		} else {
			/* dynamic hash: we start with
			 * HASHmask(cnt)/64; if there are too many
			 * collisions we try HASHmask(cnt)/16, then
			 * HASHmask(cnt)/4, and finally
			 * HASHmask(cnt).  */
			maxmask = HASHmask(cnt);
			mask = maxmask >> 6;
			p += (cnt >> 2);	/* try out on first 25% of b */
			if (p > q)
				p = q;
		}

		t0 = GDKusec();

		do {
			BUN nslots = mask >> 3;	/* 1/8 full is too full */

			r = BUNfirst(b);
			if (h) {
				char *fnme;
				bte farmid;

				ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n");
				fnme = GDKstrdup(hp->filename);
				farmid = hp->farmid;
				HEAPfree(hp, 1);
				memset(hp, 0, sizeof(*hp));
				hp->filename = fnme;
				hp->farmid = farmid;
				GDKfree(h);
				h = NULL;
			}
			/* create the hash structures */
			if ((h = HASHnew(hp, ATOMtype(b->ttype), BATcapacity(b), mask, BATcount(b))) == NULL) {

				MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash");
				GDKfree(hp->filename);
				GDKfree(hp);
				return GDK_FAIL;
			}

			switch (tpe) {
			case TYPE_bte:
				starthash(bte);
				break;
			case TYPE_sht:
				starthash(sht);
				break;
			case TYPE_int:
			case TYPE_flt:
#if SIZEOF_OID == SIZEOF_INT
			case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_INT
			case TYPE_wrd:
#endif
				starthash(int);
				break;
			case TYPE_dbl:
			case TYPE_lng:
#if SIZEOF_OID == SIZEOF_LNG
			case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_LNG
			case TYPE_wrd:
#endif
				starthash(lng);
				break;
#ifdef HAVE_HGE
			case TYPE_hge:
				starthash(hge);
				break;
#endif
			default:
				for (; r < p; r++) {
					ptr v = BUNtail(bi, r);
					BUN c = (BUN) heap_hash_any(b->T->vheap, h, v);

					if (HASHget(h, c) == HASHnil(h) &&
					    nslots-- == 0)
						break;	/* mask too full */
					HASHputlink(h, r, HASHget(h, c));
					HASHput(h, c, r);
				}
				break;
			}
		} while (r < p && mask < maxmask && (mask <<= 2));

		/* finish the hashtable with the current mask */
		p = r;
		switch (tpe) {
		case TYPE_bte:
			finishhash(bte);
			break;
		case TYPE_sht:
			finishhash(sht);
			break;
		case TYPE_int:
		case TYPE_flt:
#if SIZEOF_OID == SIZEOF_INT
		case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_INT
		case TYPE_wrd:
#endif
			finishhash(int);
			break;
		case TYPE_dbl:
		case TYPE_lng:
#if SIZEOF_OID == SIZEOF_LNG
		case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_LNG
		case TYPE_wrd:
#endif
			finishhash(lng);
			break;
#ifdef HAVE_HGE
		case TYPE_hge:
			finishhash(hge);
			break;
#endif
		default:
			for (; p < q; p++) {
				ptr v = BUNtail(bi, p);
				BUN c = (BUN) heap_hash_any(b->T->vheap, h, v);

				HASHputlink(h, p, HASHget(h, c));
				HASHput(h, c, p);
			}
			break;
		}
#ifdef PERSISTENTHASH
		if ((BBP_status(b->batCacheid) & BBPEXISTING) &&
		    b->batInserted == b->batCount &&
		    HEAPsave(hp, nme, ext) == GDK_SUCCEED &&
		    (fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >= 0) {
			ALGODEBUG fprintf(stderr, "#BAThash: persisting hash %d\n", b->batCacheid);
			((size_t *) hp->base)[0] |= 1 << 24;
			if (write(fd, hp->base, SIZEOF_SIZE_T) < 0)
				perror("write hash");
			if (!(GDKdebug & FORCEMITOMASK)) {
#if defined(NATIVE_WIN32)
				_commit(fd);
#elif defined(HAVE_FDATASYNC)
				fdatasync(fd);
#elif defined(HAVE_FSYNC)
				fsync(fd);
#endif
			}
			close(fd);
		} else
			ALGODEBUG fprintf(stderr, "#BAThash: NOT persisting hash %d\n", b->batCacheid);
#endif
		b->T->hash = h;
		t1 = GDKusec();
		ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT " usec\n", t1 - t0);
		ALGODEBUG HASHcollisions(b, b->T->hash);
	}
Ejemplo n.º 16
0
str FITSloadTable(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
	mvc *m = NULL;
	sql_schema *sch;
	sql_table *fits_fl, *fits_tbl, *tbl = NULL;
	sql_column *col;
	sql_subtype tpe;
	fitsfile *fptr;
	str tname = *(str*)getArgReference(stk, pci, 1);
	str fname;
	str msg = MAL_SUCCEED;
	oid rid = oid_nil, frid = oid_nil;
	int status = 0, cnum = 0, fid, hdu, hdutype, i, j, anynull = 0, mtype;
	int *tpcode = NULL;
	long *rep = NULL, *wid = NULL, rows;
	char keywrd[80], **cname, nm[FLEN_VALUE];
	ptr nilptr;

	msg = getSQLContext(cntxt, mb, &m, NULL);
	if (msg)
		return msg;
	sch = mvc_bind_schema(m, "sys");

	fits_tbl = mvc_bind_table(m, sch, "fits_tables");
	if (fits_tbl == NULL) {
		msg = createException(MAL, "fits.loadtable", "FITS catalog is missing.\n");
		return msg;
	}

	tbl = mvc_bind_table(m, sch, tname);
	if (tbl) {
		msg = createException(MAL, "fits.loadtable", "Table %s is already created.\n", tname);
		return msg;
	}

	col = mvc_bind_column(m, fits_tbl, "name");
	rid = table_funcs.column_find_row(m->session->tr, col, tname, NULL);
	if (rid == oid_nil) {
		msg = createException(MAL, "fits.loadtable", "Table %s is unknown in FITS catalog. Attach first the containing file\n", tname);
		return msg;
	}

	/* Open FITS file and move to the table HDU */
	col = mvc_bind_column(m, fits_tbl, "file_id");
	fid = *(int*)table_funcs.column_find_value(m->session->tr, col, rid);

	fits_fl = mvc_bind_table(m, sch, "fits_files");
	col = mvc_bind_column(m, fits_fl, "id");
	frid = table_funcs.column_find_row(m->session->tr, col, (void *)&fid, NULL);
	col = mvc_bind_column(m, fits_fl, "name");
	fname = (char *)table_funcs.column_find_value(m->session->tr, col, frid);
	if (fits_open_file(&fptr, fname, READONLY, &status)) {
		msg = createException(MAL, "fits.loadtable", "Missing FITS file %s.\n", fname);
		return msg;
	}

	col = mvc_bind_column(m, fits_tbl, "hdu");
	hdu = *(int*)table_funcs.column_find_value(m->session->tr, col, rid);
	fits_movabs_hdu(fptr, hdu, &hdutype, &status);
	if (hdutype != ASCII_TBL && hdutype != BINARY_TBL) {
		msg = createException(MAL, "fits.loadtable", "HDU %d is not a table.\n", hdu);
		fits_close_file(fptr, &status);
		return msg;
	}

	/* create a SQL table to hold the FITS table */
	/*	col = mvc_bind_column(m, fits_tbl, "columns");
	   cnum = *(int*) table_funcs.column_find_value(m->session->tr, col, rid); */
	fits_get_num_cols(fptr, &cnum, &status);
	tbl = mvc_create_table(m, sch, tname, tt_table, 0, SQL_PERSIST, 0, cnum);

	tpcode = (int *)GDKzalloc(sizeof(int) * cnum);
	rep = (long *)GDKzalloc(sizeof(long) * cnum);
	wid = (long *)GDKzalloc(sizeof(long) * cnum);
	cname = (char **)GDKzalloc(sizeof(char *) * cnum);

	for (j = 1; j <= cnum; j++) {
		/*		fits_get_acolparms(fptr, j, cname, &tbcol, tunit, tform, &tscal, &tzero, tnull, tdisp, &status); */
		snprintf(keywrd, 80, "TTYPE%d", j);
		fits_read_key(fptr, TSTRING, keywrd, nm, NULL, &status);
		if (status) {
			snprintf(nm, FLEN_VALUE, "column_%d", j);
			status = 0;
		}
		cname[j - 1] = GDKstrdup(toLower(nm));
		fits_get_coltype(fptr, j, &tpcode[j - 1], &rep[j - 1], &wid[j - 1], &status);
		fits2subtype(&tpe, tpcode[j - 1], rep[j - 1], wid[j - 1]);

		/*		mnstr_printf(cntxt->fdout,"#%d %ld %ld - M: %s\n", tpcode[j-1], rep[j-1], wid[j-1], tpe.type->sqlname); */

		mvc_create_column(m, tbl, cname[j - 1], &tpe);
	}

	/* data load */
	fits_get_num_rows(fptr, &rows, &status);
	mnstr_printf(cntxt->fdout,"#Loading %ld rows in table %s\n", rows, tname);
	for (j = 1; j <= cnum; j++) {
		BAT *tmp = NULL;
		int time0 = GDKms();
		mtype = fits2mtype(tpcode[j - 1]);
		nilptr = ATOMnil(mtype);
		col = mvc_bind_column(m, tbl, cname[j - 1]);

		tmp = BATnew(TYPE_void, mtype, rows);
		if ( tmp == NULL){
			GDKfree(tpcode);
			GDKfree(rep);
			GDKfree(wid);
			GDKfree(cname);
			throw(MAL,"fits.load", MAL_MALLOC_FAIL);
		}
		BATseqbase(tmp, 0);
		if (rows > (long)REMAP_PAGE_MAXSIZE)
			BATmmap(tmp, STORE_MMAP, STORE_MMAP, STORE_MMAP, STORE_MMAP, 0);
		if (mtype != TYPE_str) {
			fits_read_col(fptr, tpcode[j - 1], j, 1, 1, rows, nilptr, (void *)BUNtloc(bat_iterator(tmp), BUNfirst(tmp)), &anynull, &status);
			BATsetcount(tmp, rows);
			tmp->tsorted = 0;
			tmp->trevsorted = 0;
		} else {
/*			char *v = GDKzalloc(wid[j-1]);*/
			int bsize = 50;
			int tm0, tloadtm = 0, tattachtm = 0;
			int batch = bsize, k;
			char **v = (char **) GDKzalloc(sizeof(char *) * bsize);
			for(i = 0; i < bsize; i++)
				v[i] = GDKzalloc(wid[j-1]);
			for(i = 0; i < rows; i += batch) {
				batch = rows - i < bsize ? rows - i: bsize;
				tm0 = GDKms();
				fits_read_col(fptr, tpcode[j - 1], j, 1 + i, 1, batch, nilptr, (void *)v, &anynull, &status);
				tloadtm += GDKms() - tm0;
				tm0 = GDKms();
				for(k = 0; k < batch ; k++)
					BUNappend(tmp, v[k], TRUE);
				tattachtm += GDKms() - tm0;
			}
			for(i = 0; i < bsize ; i++)
				GDKfree(v[i]);
			GDKfree(v);
			mnstr_printf(cntxt->fdout,"#String column load %d ms, BUNappend %d ms\n", tloadtm, tattachtm);
		}

		if (status) {
			char buf[FLEN_ERRMSG + 1];
			fits_read_errmsg(buf);
			msg = createException(MAL, "fits.loadtable", "Cannot load column %s of %s table: %s.\n", cname[j - 1], tname, buf);
			break;
		}
		mnstr_printf(cntxt->fdout,"#Column %s loaded for %d ms\t", cname[j-1], GDKms() - time0);
		store_funcs.append_col(m->session->tr, col, tmp, TYPE_bat);
		mnstr_printf(cntxt->fdout,"#Total %d ms\n", GDKms() - time0);
		BBPunfix(tmp->batCacheid);
	}

	GDKfree(tpcode);
	GDKfree(rep);
	GDKfree(wid);
	GDKfree(cname);

	fits_close_file(fptr, &status);
	return msg;
}
Ejemplo n.º 17
0
static str
AGGRsubgroupedExt(bat *retval1, bat *retval2, bat *bid, bat *gid, bat *eid, bat *sid,
			   int skip_nils, int abort_on_error, int tp,
			   BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			   gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, int, int, int),
			   BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, double, int, int),
			   bat *quantile,
			   const char *malfunc)
{
	BAT *b, *g, *e, *s, *bn = NULL, *cnts, *q = NULL;
	double qvalue;

   /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others are */
	assert((grpfunc1 && grpfunc2 == NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc) );

	/* if retval2 is non-NULL, we must have grpfunc2 */
	assert(retval2 == NULL || grpfunc2 != NULL);

	b = BATdescriptor(*bid);
	g = gid ? BATdescriptor(*gid) : NULL;
	e = eid ? BATdescriptor(*eid) : NULL;
	q = quantile ? BATdescriptor(*quantile) : NULL;

	if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == NULL)) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc == BATgroupquantile))
		tp = b->ttype;

	if (sid) {
		s = BATdescriptor(*sid);
		if (s == NULL) {
			BBPreleaseref(b->batCacheid);
			if (g)
				BBPreleaseref(g->batCacheid);
			if (e)
				BBPreleaseref(e->batCacheid);
			throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
		}
	} else {
		if (!BAThdense(b)) {
			/* XXX backward compatibility code: ignore non-dense head, but
			 * only if no candidate list */
			s = BATmirror(BATmark(BATmirror(b), 0));
			BBPreleaseref(b->batCacheid);
			b = s;
		}
		s = NULL;
	}
	if (grpfunc1)
		bn = (*grpfunc1)(b, g, e, s, tp, skip_nils, abort_on_error);
	if (quantilefunc) {
		assert(BATcount(q)>0);
		assert(q->ttype == TYPE_dbl);
		qvalue = ((const double *)Tloc(q, BUNfirst(q)))[0];
		if (qvalue <  0|| qvalue > 1) {
			char *s;
			s = createException(MAL, malfunc, "quantile value of %f is not in range [0,1]", qvalue);
			return s;
		}
		bn = (*quantilefunc)(b, g, e, s, tp, qvalue, skip_nils, abort_on_error);
	}
	if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, tp, skip_nils, abort_on_error) == GDK_FAIL)
		bn = NULL;

	BBPreleaseref(b->batCacheid);
	if (g)
		BBPreleaseref(g->batCacheid);
	if (e)
		BBPreleaseref(e->batCacheid);
	if (s)
		BBPreleaseref(s->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	*retval1 = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	if (retval2) {
		*retval2 = cnts->batCacheid;
		BBPkeepref(cnts->batCacheid);
	}
	return MAL_SUCCEED;
}
Ejemplo n.º 18
0
str
BKCreuseBATmap(bat *ret, const bat *bid, const bat *did)
{
	BAT *b, *d, *bn, *bs;
	oid bidx, oidx = 0, *o, *ol;
	oid *r;
	gdk_return res;

	if ((b = BATdescriptor(*bid)) == NULL) {
		throw(MAL, "bat.shrinkMap", RUNTIME_OBJECT_MISSING);
	}
	if ( b->htype != TYPE_void) {
		BBPunfix(b->batCacheid);
		throw(MAL, "bat.shrinkMap", SEMANTIC_TYPE_MISMATCH);
	}
	if ((d = BATdescriptor(*did)) == NULL) {
		BBPunfix(b->batCacheid);
		throw(MAL, "bat.shrinkMap", RUNTIME_OBJECT_MISSING);
	}
	bn= BATnew(TYPE_void, TYPE_oid, BATcount(b) - BATcount(d), TRANSIENT);
	if (bn == NULL) {
		BBPunfix(b->batCacheid);
		BBPunfix(d->batCacheid);
		throw(MAL, "bat.shrinkMap", MAL_MALLOC_FAIL );
	}
	res = BATsubsort(&bs, NULL, NULL, d, NULL, NULL, 0, 0);
	BBPunfix(d->batCacheid);
	if (res != GDK_SUCCEED) {
		BBPunfix(b->batCacheid);
		BBPunfix(bn->batCacheid);
		throw(MAL, "bat.shrinkMap", MAL_MALLOC_FAIL );
	}

	oidx = b->hseqbase;
	bidx = oidx + BATcount(b)-1;
	o  = (oid*)Tloc(bs, BUNfirst(bs));
	ol = (oid*)Tloc(bs, BUNlast(bs));
	r  = (oid*)Tloc(bn, BUNfirst(bn));

	for (; oidx <= bidx; oidx++) {
		if ( *o == oidx ){
			while ( ol > o && ol[-1] == bidx) {
				bidx--;
				ol--;
			}
			*r++ = bidx;
			o += (o < ol);
			bidx--;
		} else {
			*r++ = oidx;
		}
	}

    BATsetcount(bn, BATcount(b)-BATcount(bs));
	BATseqbase(bn, b->hseqbase);
    bn->tsorted = 0;
    bn->trevsorted = 0;
    bn->tdense = 0;

    if (!(bn->batDirty&2)) BATsetaccess(bn, BAT_READ);

	BBPunfix(b->batCacheid);
	BBPunfix(bs->batCacheid);
	BBPkeepref(*ret= bn->batCacheid);
	return MAL_SUCCEED;
}
Ejemplo n.º 19
0
/*
 * The prime routine for the BAT layer is to create a new hash index.
 * Its argument is the element type and the maximum number of BUNs be
 * stored under the hash function.
 */
BAT *
BAThash(BAT *b, BUN masksize)
{
	BAT *o = NULL;
	lng t0,t1;
	(void) t0; 
	(void) t1;

	if (VIEWhparent(b)) {
		bat p = VIEWhparent(b);
		o = b;
		b = BATdescriptor(p);
		if (!ALIGNsynced(o, b) || BUNfirst(o) != BUNfirst(b)) {
			BBPunfix(b->batCacheid);
			b = o;
			o = NULL;
		}
	}
	MT_lock_set(&GDKhashLock(ABS(b->batCacheid)), "BAThash");
	if (b->H->hash == NULL) {
		unsigned int tpe = ATOMstorage(b->htype);
		BUN cnt = BATcount(b);
		BUN mask;
		BUN p = BUNfirst(b), q = BUNlast(b), r;
		Hash *h = NULL;
		Heap *hp = NULL;
		str nme = BBP_physical(b->batCacheid);
		BATiter bi = bat_iterator(b);

		ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b));
		/* cnt = 0, hopefully there is a proper capacity from
		 * which we can derive enough information */
		if (!cnt)
			cnt = BATcapacity(b);

		if (b->htype == TYPE_void) {
			if (b->hseqbase == oid_nil) {
				MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash");
				ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n");
				return NULL;
			}
			ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n");

			tpe = TYPE_void;
		}
		/* determine hash mask size p = first; then no dynamic
		 * scheme */
		if (masksize > 0) {
			mask = HASHmask(masksize);
		} else if (ATOMsize(ATOMstorage(tpe)) == 1) {
			mask = (1 << 8);
		} else if (ATOMsize(ATOMstorage(tpe)) == 2) {
			mask = (1 << 12);
		} else if (b->hkey) {
			mask = HASHmask(cnt);
		} else {
			/* dynamic hash: we start with
			 * HASHmask(cnt/64); if there are too many
			 * collisions we try HASHmask(cnt/16), then
			 * HASHmask(cnt/4), and finally
			 * HASHmask(cnt).  */
			mask = HASHmask(cnt >> 6);
			p += (cnt >> 2);	/* try out on first 25% of b */
			if (p > q)
				p = q;
		}

		if (mask < 1024)
			mask = 1024;
		t0 = GDKusec();
		do {
			BUN nslots = mask >> 3;	/* 1/8 full is too full */

			r = BUNfirst(b);
			if (hp) {
				HEAPfree(hp);
				GDKfree(hp);
			}
			if (h) {
				ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n");
				GDKfree(h);
			}
			/* create the hash structures */
			hp = (Heap *) GDKzalloc(sizeof(Heap));
			if (hp &&
			    (hp->filename = GDKmalloc(strlen(nme) + 12)) != NULL)
				sprintf(hp->filename, "%s.%chash", nme, b->batCacheid > 0 ? 'h' : 't');
			if (hp == NULL ||
			    hp->filename == NULL ||
			    (h = HASHnew(hp, ATOMtype(b->htype), BATcapacity(b), mask)) == NULL) {

				MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash");
				if (hp != NULL) {
					GDKfree(hp->filename);
					GDKfree(hp);
				}
				return NULL;
			}

			switch (tpe) {
			case TYPE_bte:
				starthash(bte);
				break;
			case TYPE_sht:
				starthash(sht);
				break;
			case TYPE_int:
			case TYPE_flt:
				starthash(int);
				break;
			case TYPE_dbl:
			case TYPE_lng:
				starthash(lng);
				break;
			default:
				for (; r < p; r++) {
					ptr v = BUNhead(bi, r);
					BUN c = (BUN) heap_hash_any(b->H->vheap, h, v);

					if ( HASHget(h,c) == HASHnil(h) &&
					    nslots-- == 0)
						break;	/* mask too full */
					HASHputlink(h,r, HASHget(h,c));
					HASHput(h,c, r);
				}
				break;
			}
		} while (r < p && mask < cnt && (mask <<= 2));

		/* finish the hashtable with the current mask */
		p = r;
		switch (tpe) {
		case TYPE_bte:
			finishhash(bte);
			break;
		case TYPE_sht:
			finishhash(sht);
			break;
		case TYPE_int:
		case TYPE_flt:
			finishhash(int);
			break;
		case TYPE_dbl:
		case TYPE_lng:
			finishhash(lng);
			break;
		default:
			for (; p < q; p++) {
				ptr v = BUNhead(bi, p);
				BUN c = (BUN) heap_hash_any(b->H->vheap, h, v);

				HASHputlink(h,p, HASHget(h,c));
				HASHput(h,c,p);
			}
			break;
		}
		b->H->hash = h;
		t1 = GDKusec();
		ALGODEBUG 
				fprintf(stderr, "#BAThash: hash construction "LLFMT" usec\n", t1-t0);
		ALGODEBUG HASHcollisions(b,b->H->hash);
	}
Ejemplo n.º 20
0
// merging multiple OID lists, optimized for empty bats
// Further improvement should come from multi-bat merging.
str
MATmergepack(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
{
	int i,j= 0, *ret = (int*) getArgReference(stk,p,0);
	int top=0;
	oid  **o_end, **o_src, *o, *oo, onxt;
	BAT *b, *bn, *bm, **bats;
	BUN cap = 0;

	(void)cntxt;
	(void)mb;
	bats = (BAT**) GDKzalloc(sizeof(BAT*) * p->argc);
	o_end = (oid**) GDKzalloc(sizeof(oid*) * p->argc);
	o_src = (oid**) GDKzalloc(sizeof(oid*) * p->argc);

	if ( bats ==0 || o_end == 0 || o_src == 0){
		if (bats) GDKfree(bats);
		if (o_src) GDKfree(o_src);
		if (o_end) GDKfree(o_end);
		throw(MAL,"mat.mergepack",MAL_MALLOC_FAIL);
	}
	for (i = 1; i < p->argc; i++) {
		int bid = stk->stk[getArg(p,i)].val.ival;
		b = BATdescriptor(abs(bid));
		if (b ){
			cap += BATcount(b);
			if ( BATcount(b) ){
				// pre-sort the arguments
				onxt = *(oid*) Tloc(b,BUNfirst(b));
				for( j =top; j > 0 && onxt < *o_src[j-1]; j--){
					o_src[j] = o_src[j-1];
					o_end[j] = o_end[j-1];
					bats[j] = bats[j-1];
				}
				o_src[j] = (oid*) Tloc(b,BUNfirst(b));
				o_end[j] = o_src[j] + BATcount(b);
				bats[j] = b;
				top++;
			}
		}
	}

	bn = BATnew(TYPE_void, TYPE_oid, cap, TRANSIENT);
	if (bn == NULL){
		GDKfree(bats);
		GDKfree(o_src);
		GDKfree(o_end);
		throw(MAL, "mat.pack", MAL_MALLOC_FAIL);
	}

	if ( cap == 0){
		BATseqbase(bn, 0);
		BATseqbase(BATmirror(bn), 0);
		BBPkeepref(*ret = bn->batCacheid);
		GDKfree(bats);
		GDKfree(o_src);
		GDKfree(o_end);
		return MAL_SUCCEED;
	}
	BATseqbase(bn, bats[0]->hseqbase);
	// UNROLL THE MULTI-BAT MERGE
	o = (oid*) Tloc(bn,BUNfirst(bn));
	while( top){
		*o++ = *o_src[0];
		o_src[0]++;
		if( o_src[0] == o_end[0]){
			// remove this one
			for(j=0; j< top; j++){
				o_src[j]= o_src[j+1];
				o_end[j]= o_end[j+1];
				bats[j] = bats[j+1];
			}
			top--;
		} else{
			// resort priority queue
			onxt= *o_src[0];
			for( j=1; j< top && onxt > *o_src[j]; j++){
				oo = o_src[j]; o_src[j]= o_src[j-1]; o_src[j-1]= oo;
				oo = o_end[j]; o_end[j]= o_end[j-1]; o_end[j-1]= oo;
				bm = bats[j]; bats[j]=bats[j-1]; bats[j-1] = bm;
			}
		}
	}
	for( i=0; i< top; i++)
		BBPunfix(bats[i]->batCacheid);
    BATsetcount(bn, (BUN) (o - (oid *) Tloc(bn, BUNfirst(bn))));
    BATseqbase(bn, 0);
	BATsettrivprop(bn);
	GDKfree(bats);
	GDKfree(o_src);
	GDKfree(o_end);
    /* properties */
    bn->trevsorted = 0;
    bn->tsorted = 1;
    bn->tkey = 1;
    bn->T->nil = 0;
    bn->T->nonil = 1;
	BBPkeepref(*ret = bn->batCacheid);
	return MAL_SUCCEED;
}
Ejemplo n.º 21
0
str
BATXMLforest(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
	bat *ret = getArgReference_bat(stk, pci, 0);
	BAT *bn;
	BATiter *bi;
	BUN *p, *q;
	str buf;
	int i;
	size_t offset, len, size = BUFSIZ;
	const char *err = OPERATION_FAILED;

	(void) mb;
	(void) cntxt;
	buf = GDKmalloc(size);
	bi = GDKmalloc(sizeof(BATiter) * pci->argc);
	p = GDKmalloc(sizeof(BUN) * pci->argc);
	q = GDKmalloc(sizeof(BUN) * pci->argc);
	if (buf == NULL || bi == NULL || p == NULL || q == NULL) {
		if (buf)
			GDKfree(buf);
		if (bi)
			GDKfree(bi);
		if (p)
			GDKfree(p);
		if (q)
			GDKfree(q);
		throw(MAL, "xml.forest", MAL_MALLOC_FAIL);
	}

	/* collect the admin for the xml elements */
	for (i = pci->retc; i < pci->argc; i++) {
		if ((bi[i].b = BATdescriptor(*getArgReference_bat(stk, pci, i))) == NULL)
			break;
		p[i] = BUNfirst(bi[i].b);
		q[i] = BUNlast(bi[i].b);
	}
	/* check for errors */
	if (i != pci->argc) {
		for (i--; i >= pci->retc; i--)
			if (bi[i].b)
				BBPunfix(bi[i].b->batCacheid);
		GDKfree(bi);
		GDKfree(p);
		GDKfree(q);
		GDKfree(buf);
		throw(MAL, "xml.forest", INTERNAL_BAT_ACCESS);
	}

	prepareResult(bn, bi[pci->retc].b, TYPE_xml, "forest",
				  for (i = pci->retc; i < pci->argc; i++) BBPunfix(bi[i].b->batCacheid);
				  GDKfree(bi); GDKfree(p); GDKfree(q); GDKfree(buf));

	while (p[pci->retc] < q[pci->retc]) {
		const char *t;

		/* fetch the elements */
		offset = 0;
		strcpy(buf, str_nil);
		for (i = pci->retc; i < pci->argc; i++) {
			int n;

			t = (const char *) BUNtail(bi[i], p[i]);
			if (strNil(t))
				continue;

			if ((len = strlen(t)) >= size - offset) {
				size += len + 128;
				buf = GDKrealloc(buf, size);
				if (buf == NULL) {
					err = MAL_MALLOC_FAIL;
					goto bunins_failed;
				}
			}
			if (offset == 0)
				n = snprintf(buf, size, "%s", t);
			else if (buf[0] != *t) {
				err = "incompatible values in forest";
				goto bunins_failed;
			} else if (buf[0] == 'A')
				n = snprintf(buf + offset, size - offset, " %s", t + 1);
			else if (buf[0] == 'C')
				n = snprintf(buf + offset, size - offset, "%s", t + 1);
			else {
				err = "can only combine attributes and element content";
				goto bunins_failed;
			}
			offset += n;
		}
		bunfastapp(bn, buf);
		if (offset == 0)
			bn->T->nonil = 0;

		for (i = pci->retc; i < pci->argc; i++)
			if (bi[i].b)
				p[i]++;
	}
	GDKfree(buf);
	finalizeResult(ret, bn, bi[pci->retc].b);
	GDKfree(bi);
	GDKfree(p);
	GDKfree(q);
	return MAL_SUCCEED;
bunins_failed:
	for (i = pci->retc; i < pci->argc; i++)
		if (bi[i].b)
			BBPunfix(bi[i].b->batCacheid);
	BBPunfix(bn->batCacheid);
	if (buf != NULL)
		GDKfree(buf);
	GDKfree(bi);
	GDKfree(p);
	GDKfree(q);
	throw(MAL, "xml.forest", "%s", err);
}
Ejemplo n.º 22
0
str
BATXMLconcat(bat *ret, const bat *bid, const bat *rid)
{
	BAT *b, *r = 0, *bn;
	BUN p, q, rp = 0;
	size_t len, size = BUFSIZ;
	str buf = GDKmalloc(size);
	BATiter bi, ri;
	const char *err = OPERATION_FAILED;

	if (buf == NULL)
		throw(MAL, "xml.concat", MAL_MALLOC_FAIL);
	b = BATdescriptor(*bid);
	r = BATdescriptor(*rid);
	if (b == NULL || r == NULL) {
		GDKfree(buf);
		if (b)
			BBPunfix(b->batCacheid);
		if (r)
			BBPunfix(r->batCacheid);
		throw(MAL, "xml.concat", INTERNAL_BAT_ACCESS);
	}
	p = BUNfirst(b);
	q = BUNlast(b);
	rp = BUNfirst(r);

	prepareResult(bn, b, TYPE_xml, "concat",
				  GDKfree(buf); BBPunfix(r->batCacheid));

	bi = bat_iterator(b);
	ri = bat_iterator(r);
	while (p < q) {
		const char *t = (const char *) BUNtail(bi, p);
		const char *v = (const char *) BUNtail(ri, rp);

		len = strlen(t) + strlen(v) + 1;

		if (len >= size) {
			GDKfree(buf);
			size = len + 128;
			buf = GDKmalloc(size);
			if (buf == NULL) {
				err= MAL_MALLOC_FAIL;
				goto bunins_failed;
			}
		}
		if (strNil(t)) {
			if (strNil(v)) {
				strcpy(buf, str_nil);
				bn->T->nonil = 0;
			} else
				strcpy(buf, v);
		} else {
			if (strNil(v))
			   strcpy(buf, t);
			else if (*t != *v) {
				err = "arguments not compatible";
				goto bunins_failed;
			} else if (*t == 'A')
				snprintf(buf, size, "A%s %s", t + 1, v + 1);
			else if (*t == 'C')
				snprintf(buf, size, "C%s%s", t + 1, v + 1);
			else {
				err = "can only concatenate attributes and element content";
				goto bunins_failed;
			}
		}
		bunfastapp(bn, buf);
		rp++;
		p++;
	}
	GDKfree(buf);
	finalizeResult(ret, bn, b);
	return MAL_SUCCEED;
  bunins_failed:
	BBPunfix(r->batCacheid);
	BBPunfix(b->batCacheid);
	BBPunfix(bn->batCacheid);
	if (buf != NULL)
		GDKfree(buf);
	throw(MAL, "xml.concat", "%s", err);
}
Ejemplo n.º 23
0
Archivo: json.c Proyecto: f7753/monetdb
static str
JSONfoldKeyValue(str *ret, const bat *id, const bat *key, const bat *values)
{
	BAT *bo = 0, *bk = 0, *bv;
	BATiter boi, bki, bvi;
	int tpe;
	char *row, *val = 0, *nme = 0;
	BUN i, cnt;
	size_t len, lim, l;
	void *p;
	oid o = 0;;

	if (key) {
		bk = BATdescriptor(*key);
		if (bk == NULL) {
			*ret = GDKstrdup(str_nil);
			throw(MAL, "json.fold", RUNTIME_OBJECT_MISSING);
		}
	}

	bv = BATdescriptor(*values);
	if (bv == NULL) {
		if (bk)
			BBPunfix(bk->batCacheid);
		*ret = GDKstrdup(str_nil);
		throw(MAL, "json.fold", RUNTIME_OBJECT_MISSING);
	}
	tpe = bv->ttype;
	cnt = BATcount(bv);
	if (bk)
		bki = bat_iterator(bk);
	bvi = bat_iterator(bv);
	if (id) {
		bo = BATdescriptor(*id);
		if (bo == NULL) {
			if (bk)
				BBPunfix(bk->batCacheid);
			BBPunfix(bv->batCacheid);
			throw(MAL, "json.nest", RUNTIME_OBJECT_MISSING);
		}
	}

	row = (char *) GDKmalloc(lim = BUFSIZ);
	row[0] = '[';
	row[1] = 0;
	len = 1;
	val = (char *) GDKmalloc(BUFSIZ);
	if (id) {
		boi = bat_iterator(bo);
		o = *(oid *) BUNtail(boi, BUNfirst(bo));
	}
	if (bk)
		bki = bat_iterator(bk);
	bvi = bat_iterator(bv);

	for (i = 0; i < cnt; i++) {
		if (id &&bk) {
			p = BUNtail(boi, BUNfirst(bo) + i);
			if (*(oid *) p != o) {
				snprintf(row + len, lim - len, ", ");
				len += 2;
				o = *(oid *) p;
			}
		}

		if (bk) {
			nme = (str) BUNtail(bki, BUNfirst(bk) + i);
			l = strlen(nme);
			while (l + 3 > lim - len)
				row = (char *) GDKrealloc(row, lim = (lim / (i + 1)) * cnt + BUFSIZ + l + 3);
			if (row == NULL) {
				*ret = GDKstrdup(str_nil);
				if (bo)
					BBPunfix(bo->batCacheid);
				if (bk)
					BBPunfix(bk->batCacheid);
				BBPunfix(bv->batCacheid);
				throw(MAL, "json.fold", MAL_MALLOC_FAIL);
			}
			if (strcmp(nme, str_nil)) {
				snprintf(row + len, lim - len, "\"%s\":", nme);
				len += l + 3;
			}
		}

		bvi = bat_iterator(bv);
		p = BUNtail(bvi, BUNfirst(bv) + i);
		if (tpe == TYPE_json)
			val = p;
		else {
			ATOMformat(tpe, p, &val);
			if (strncmp(val, "nil", 3) == 0)
				strcpy(val, "null");
		}
		l = strlen(val);
		while (l > lim - len)
			row = (char *) GDKrealloc(row, lim = (lim / (i + 1)) * cnt + BUFSIZ + l + 3);

		if (row == NULL) {
			if (bo)
				BBPunfix(bo->batCacheid);
			if (bk)
				BBPunfix(bk->batCacheid);
			BBPunfix(bv->batCacheid);
			*ret = GDKstrdup(str_nil);
			throw(MAL, "json.fold", MAL_MALLOC_FAIL);
		}
		strncpy(row + len, val, l);
		len += l;
		row[len++] = ',';
		row[len] = 0;
	}
	if (row[1]) {
		row[len - 1] = ']';
		row[len] = 0;
	} else {
		row[1] = ']';
		row[2] = 0;
	}
	if (tpe != TYPE_json)
		GDKfree(val);
	if (bo)
		BBPunfix(bo->batCacheid);
	if (bk)
		BBPunfix(bk->batCacheid);
	BBPunfix(bv->batCacheid);
	*ret = row;
	return MAL_SUCCEED;
}
Ejemplo n.º 24
0
/*
 * grouped aggregates
 */
static str
AGGRgrouped(bat *retval1, bat *retval2, BAT *b, BAT *g, BAT *e, int tp,
			BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, int),
			gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, int, int, int),
			BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, double, int, int),
			BAT *quantile,
			int skip_nils,
			const char *malfunc)
{
	BAT *bn, *cnts = NULL, *t, *map;
	double qvalue;

   /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others are */
	assert((grpfunc1 != NULL && grpfunc2 == NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 != NULL && quantilefunc == NULL) ||
			(grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc != NULL) );
	/* if retval2 is non-NULL, we must have grpfunc2 */
	assert(retval2 == NULL || grpfunc2 != NULL);
	assert(quantile == NULL || quantilefunc != NULL);

	if (b == NULL || g == NULL || e == NULL) {
		if (b)
			BBPreleaseref(b->batCacheid);
		if (g)
			BBPreleaseref(g->batCacheid);
		if (e)
			BBPreleaseref(e->batCacheid);
		throw(MAL, malfunc, RUNTIME_OBJECT_MISSING);
	}
	if (tp == TYPE_any && (grpfunc1 == BATgroupmedian || quantilefunc == BATgroupquantile))
		tp = b->ttype;
	if (!BAThdense(b) || !BAThdense(g)) {
		/* if b or g don't have a dense head, replace the head with a
		 * dense sequence */
		t = BATjoin(BATmirror(b), g, MIN(BATcount(b), BATcount(g)));
		BBPreleaseref(b->batCacheid);
		BBPreleaseref(g->batCacheid);
		b = BATmirror(BATmark(t, 0));
		g = BATmirror(BATmark(BATmirror(t), 0));
		BBPreleaseref(t->batCacheid);
	}
	if (b->hseqbase != g->hseqbase || BATcount(b) != BATcount(g)) {
		/* b and g are not aligned: align them by creating a view on
		 * one or the other */
		oid min;				/* lowest common oid */
		oid max;				/* highest common oid */
		min = b->hseqbase;
		if (min < g->hseqbase)
			min = g->hseqbase;
		max = b->hseqbase + BATcount(b);
		if (g->hseqbase + BATcount(g) < max)
			max = g->hseqbase + BATcount(g);
		if (b->hseqbase != min || b->hseqbase + BATcount(b) != max) {
			if (min >= max)
				min = max = b->hseqbase;
			t = BATslice(b, BUNfirst(b) + (BUN) (min - b->hseqbase),
						 BUNfirst(b) + (BUN) (max - b->hseqbase));
			BBPreleaseref(b->batCacheid);
			b = t;
		}
		if (g->hseqbase != min || g->hseqbase + BATcount(g) != max) {
			if (min >= max)
				min = max = g->hseqbase;
			t = BATslice(g, BUNfirst(g) + (BUN) (min - g->hseqbase),
						 BUNfirst(g) + (BUN) (max - g->hseqbase));
			BBPreleaseref(g->batCacheid);
			g = t;
		}
	}
	if (!BAThdense(e)) {
		/* if e doesn't have a dense head, renumber the group ids with
		 * a dense sequence at the cost of some left joins */
		map = BATmark(e, 0);	/* [gid,newgid(dense)] */
		BBPreleaseref(e->batCacheid);
		e = BATmirror(map);		/* [newgid(dense),gid] */
		t = BATleftjoin(g, map, BATcount(g)); /* [oid,newgid] */
		BBPreleaseref(g->batCacheid);
		g = t;
	} else {
		map = NULL;
	}
	if (grpfunc1)
		bn = (*grpfunc1)(b, g, e, NULL, tp, skip_nils, 1);
	if (quantilefunc) {
		assert(BATcount(quantile)>0);
		assert(quantile->ttype == TYPE_dbl);
		qvalue = ((const double *)Tloc(quantile, BUNfirst(quantile)))[0];
		if (qvalue <  0|| qvalue > 1) {
			char *s;
			s = createException(MAL, malfunc, "quantile value of %f is not in range [0,1]", qvalue);
			return s;
		}
		bn = (*quantilefunc)(b, g, e, NULL, tp, qvalue, skip_nils, 1);
	}
	if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, tp, skip_nils, 1) == GDK_FAIL)
		bn = NULL;
	if (bn != NULL && (grpfunc1 == BATgroupmin || grpfunc1 == BATgroupmax)) {
		t = BATproject(bn, b);
		BBPreleaseref(bn->batCacheid);
		bn = t;
	}
	BBPreleaseref(b->batCacheid);
	BBPreleaseref(g->batCacheid);
	if (map == NULL)			/* if map!=NULL, e is mirror of map */
		BBPreleaseref(e->batCacheid);
	if (bn == NULL) {
		char *errbuf = GDKerrbuf;
		char *s;

		if (map)
			BBPreleaseref(map->batCacheid);

		if (errbuf && *errbuf) {
			if (strncmp(errbuf, "!ERROR: ", 8) == 0)
				errbuf += 8;
			if (strchr(errbuf, '!') == errbuf + 5) {
				s = createException(MAL, malfunc, "%s", errbuf);
			} else if ((s = strchr(errbuf, ':')) != NULL && s[1] == ' ') {
				s = createException(MAL, malfunc, "%s", s + 2);
			} else {
				s = createException(MAL, malfunc, "%s", errbuf);
			}
			*GDKerrbuf = 0;
			return s;
		}
		throw(MAL, malfunc, OPERATION_FAILED);
	}
	if (map) {
		t = BATleftjoin(map, bn, BATcount(bn));
		BBPreleaseref(bn->batCacheid);
		bn = t;
		if (cnts) {
			t = BATleftjoin(map, cnts, BATcount(cnts));
			BBPreleaseref(cnts->batCacheid);
			cnts = t;
		}
		BBPreleaseref(map->batCacheid);
	}
	*retval1 = bn->batCacheid;
	BBPkeepref(bn->batCacheid);
	if (retval2) {
		*retval2 = cnts->batCacheid;
		BBPkeepref(cnts->batCacheid);
	}
	return MAL_SUCCEED;
}
Ejemplo n.º 25
0
static BUN
SORTfndwhich(BAT *b, const void *v, enum find_which which)
{
	BUN lo, hi, mid;
	int cmp;
	BUN cur;
	BATiter bi;
	BUN diff, end;
	int tp;

	if (b == NULL || (!b->tsorted && !b->trevsorted))
		return BUN_NONE;

	lo = BUNfirst(b);
	hi = BUNlast(b);

	if (BATtdense(b)) {
		/* no need for binary search on dense column */
		if (*(const oid *) v < b->tseqbase ||
		    *(const oid *) v == oid_nil)
			return which == FIND_ANY ? BUN_NONE : lo;
		if (*(const oid *) v >= b->tseqbase + BATcount(b))
			return which == FIND_ANY ? BUN_NONE : hi;
		cur = (BUN) (*(const oid *) v - b->tseqbase) + lo;
		return cur + (which == FIND_LAST);
	}
	if (b->ttype == TYPE_void) {
		assert(b->tseqbase == oid_nil);
		switch (which) {
		case FIND_FIRST:
			if (*(const oid *) v == oid_nil)
				return lo;
		case FIND_LAST:
			return hi;
		default:
			if (lo < hi && *(const oid *) v == oid_nil)
				return lo;
			return BUN_NONE;
		}
	}
	cmp = 1;
	cur = BUN_NONE;
	bi = bat_iterator(b);
	/* only use storage type if comparison functions are equal */
	tp = ATOMbasetype(b->ttype);

	switch (which) {
	case FIND_FIRST:
		end = lo;
		if (lo >= hi || (b->tsorted ? atom_GE(BUNtail(bi, lo), v, b->ttype) : atom_LE(BUNtail(bi, lo), v, b->ttype))) {
			/* shortcut: if BAT is empty or first (and
			 * hence all) tail value is >= v (if sorted)
			 * or <= v (if revsorted), we're done */
			return lo;
		}
		break;
	case FIND_LAST:
		end = hi;
		if (lo >= hi || (b->tsorted ? atom_LE(BUNtail(bi, hi - 1), v, b->ttype) : atom_GE(BUNtail(bi, hi - 1), v, b->ttype))) {
			/* shortcut: if BAT is empty or first (and
			 * hence all) tail value is <= v (if sorted)
			 * or >= v (if revsorted), we're done */
			return hi;
		}
		break;
	default: /* case FIND_ANY -- stupid compiler */
		end = 0;	/* not used in this case */
		if (lo >= hi) {
			/* empty BAT: value not found */
			return BUN_NONE;
		}
		break;
	}

	if (b->tsorted) {
		switch (tp) {
		case TYPE_bte:
			SORTfndloop(bte, simple_CMP, BUNtloc);
			break;
		case TYPE_sht:
			SORTfndloop(sht, simple_CMP, BUNtloc);
			break;
		case TYPE_int:
			SORTfndloop(int, simple_CMP, BUNtloc);
			break;
		case TYPE_lng:
			SORTfndloop(lng, simple_CMP, BUNtloc);
			break;
#ifdef HAVE_HGE
		case TYPE_hge:
			SORTfndloop(hge, simple_CMP, BUNtloc);
			break;
#endif
		case TYPE_flt:
			SORTfndloop(flt, simple_CMP, BUNtloc);
			break;
		case TYPE_dbl:
			SORTfndloop(dbl, simple_CMP, BUNtloc);
			break;
		default:
			if (b->tvarsized)
				SORTfndloop(b->ttype, atom_CMP, BUNtvar);
			else
				SORTfndloop(b->ttype, atom_CMP, BUNtloc);
			break;
		}
	} else {
		switch (tp) {
		case TYPE_bte:
			SORTfndloop(bte, -simple_CMP, BUNtloc);
			break;
		case TYPE_sht:
			SORTfndloop(sht, -simple_CMP, BUNtloc);
			break;
		case TYPE_int:
			SORTfndloop(int, -simple_CMP, BUNtloc);
			break;
		case TYPE_lng:
			SORTfndloop(lng, -simple_CMP, BUNtloc);
			break;
#ifdef HAVE_HGE
		case TYPE_hge:
			SORTfndloop(hge, -simple_CMP, BUNtloc);
			break;
#endif
		case TYPE_flt:
			SORTfndloop(flt, -simple_CMP, BUNtloc);
			break;
		case TYPE_dbl:
			SORTfndloop(dbl, -simple_CMP, BUNtloc);
			break;
		default:
			if (b->tvarsized)
				SORTfndloop(b->ttype, -atom_CMP, BUNtvar);
			else
				SORTfndloop(b->ttype, -atom_CMP, BUNtloc);
			break;
		}
	}

	switch (which) {
	case FIND_FIRST:
		if (cmp == 0 && b->tkey == 0) {
			/* shift over multiple equals */
			for (diff = cur - end; diff; diff >>= 1) {
				while (cur >= end + diff &&
				       atom_EQ(BUNtail(bi, cur - diff), v, b->ttype))
					cur -= diff;
			}
		}
		break;
	case FIND_LAST:
		if (cmp == 0 && b->tkey == 0) {
			/* shift over multiple equals */
			for (diff = (end - cur) >> 1; diff; diff >>= 1) {
				while (cur + diff < end &&
				       atom_EQ(BUNtail(bi, cur + diff), v, b->ttype))
					cur += diff;
			}
		}
		cur += (cmp == 0);
		break;
	default: /* case FIND_ANY -- stupid compiler */
		if (cmp) {
			/* not found */
			cur = BUN_NONE;
		}
		break;
	}
	return cur;
}
Ejemplo n.º 26
0
str
MKEYbathash(bat *res, const bat *bid)
{
	BAT *b, *dst;
	wrd *r;
	BUN n;

	if ((b = BATdescriptor(*bid)) == NULL)
		throw(SQL, "mkey.bathash", RUNTIME_OBJECT_MISSING);

	assert(BAThvoid(b) || BAThrestricted(b));

	n = BATcount(b);
	dst = BATnew(TYPE_void, TYPE_wrd, n, TRANSIENT);
	if (dst == NULL) {
		BBPunfix(b->batCacheid);
		throw(SQL, "mkey.bathash", MAL_MALLOC_FAIL);
	}
	BATseqbase(dst, b->hseqbase);
	BATsetcount(dst, n);

	r = (wrd *) Tloc(dst, BUNfirst(dst));

	switch (ATOMstorage(b->ttype)) {
	case TYPE_void: {
		oid o = b->tseqbase;
		if (o == oid_nil)
			while (n-- > 0)
				*r++ = wrd_nil;
		else
			while (n-- > 0)
				*r++ = (wrd) o++;
		break;
	}
	case TYPE_bte: {
		bte *v = (bte *) Tloc(b, BUNfirst(b));
		while (n-- > 0) {
			*r++ = MKEYHASH_bte(v);
			v++;
		}
		break;
	}
	case TYPE_sht: {
		sht *v = (sht *) Tloc(b, BUNfirst(b));
		while (n-- > 0) {
			*r++ = MKEYHASH_sht(v);
			v++;
		}
		break;
	}
	case TYPE_int:
	case TYPE_flt: {
		int *v = (int *) Tloc(b, BUNfirst(b));
		while (n-- > 0) {
			*r++ = MKEYHASH_int(v);
			v++;
		}
		break;
	}
	case TYPE_lng:
	case TYPE_dbl: {
		lng *v = (lng *) Tloc(b, BUNfirst(b));
		while (n-- > 0) {
			*r++ = MKEYHASH_lng(v);
			v++;
		}
		break;
	}
#ifdef HAVE_HGE
	case TYPE_hge: {
		hge *v = (hge *) Tloc(b, BUNfirst(b));
		while (n-- > 0) {
			*r++ = MKEYHASH_hge(v);
			v++;
		}
		break;
	}
#endif
	default: {
		BATiter bi = bat_iterator(b);
		BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
		int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
		void *nil = ATOMnilptr(b->ttype);
		BUN i;
		const void *v;

		BATloop(b, i, n) {
			v = BUNtail(bi, i);
			if ((*cmp)(v, nil) == 0)
				*r++ = wrd_nil;
			else
				*r++ = (wrd) (*hash)(v);
		}
		break;
	}
	}
Ejemplo n.º 27
0
str
BKCreuseBAT(bat *ret, const bat *bid, const bat *did)
{
	BAT *b, *d, *bn, *bs;
	oid oidx = 0, bidx, *o, *ol;
	gdk_return res;

	if ((b = BATdescriptor(*bid)) == NULL) {
		throw(MAL, "bat.reuse", RUNTIME_OBJECT_MISSING);
	}
	if ( b->htype != TYPE_void) {
		BBPunfix(b->batCacheid);
		throw(MAL, "bat.reuse", SEMANTIC_TYPE_MISMATCH);
	}
	if ((d = BATdescriptor(*did)) == NULL) {
		BBPunfix(b->batCacheid);
		throw(MAL, "bat.reuse", RUNTIME_OBJECT_MISSING);
	}
	bn= BATnew(b->htype, b->ttype, BATcount(b) - BATcount(d), TRANSIENT);
	if (bn == NULL) {
		BBPunfix(b->batCacheid);
		BBPunfix(d->batCacheid);
		throw(MAL, "bat.reuse", MAL_MALLOC_FAIL );
	}
	res = BATsubsort(&bs, NULL, NULL, d, NULL, NULL, 0, 0);
	BBPunfix(d->batCacheid);
	if (res != GDK_SUCCEED) {
		BBPunfix(b->batCacheid);
		BBPunfix(bn->batCacheid);
		throw(MAL, "bat.reuse", MAL_MALLOC_FAIL );
	}

	oidx = b->hseqbase;
	bidx = oidx + BATcount(b)-1;
	o = (oid*)Tloc(bs, BUNfirst(bs));
	ol= (oid*)Tloc(bs, BUNlast(bs));

	switch(ATOMstorage(b->ttype) ){
	case TYPE_bte: reuseloop(bte); break;
	case TYPE_sht: reuseloop(sht); break;
	case TYPE_int: reuseloop(int); break;
	case TYPE_lng: reuseloop(lng); break;
#ifdef HAVE_HGE
	case TYPE_hge: reuseloop(hge); break;
#endif
	case TYPE_flt: reuseloop(flt); break;
	case TYPE_dbl: reuseloop(dbl); break;
	case TYPE_oid: reuseloop(oid); break;
	case TYPE_str: /* to be done based on its index width */
	default:
		if (ATOMvarsized(bn->ttype)) {
			BUN p = BUNfirst(b);
			BUN q = BUNlast(b);
			BATiter bi = bat_iterator(b);

			for (;p<q; oidx++, p++) {
				if ( *o == oidx ){
					while ( ol > o && ol[-1] == bidx) {
						bidx--;
						q--;
						ol--;
					}
					BUNappend(bn, BUNtail(bi, --q), FALSE);
					o += (o < ol);
					bidx--;
				} else {
					BUNappend(bn, BUNtail(bi, p), FALSE);
				}
			}
		} else {
			switch( b->T->width){
			case 1:reuseloop(bte); break;
			case 2:reuseloop(sht); break;
			case 4:reuseloop(int); break;
			case 8:reuseloop(lng); break;
#ifdef HAVE_HGE
			case 16:reuseloop(hge); break;
#endif
			default:
				throw(MAL, "bat.shrink", "Illegal argument type");
			}
		}
	}

    BATsetcount(bn, BATcount(b) - BATcount(bs));
	BATseqbase(bn, b->hseqbase);
    bn->tsorted = 0;
    bn->trevsorted = 0;
    bn->tdense = 0;
	bn->tkey = b->tkey;

    if (!(bn->batDirty&2)) BATsetaccess(bn, BAT_READ);

	BBPunfix(b->batCacheid);
	BBPunfix(bs->batCacheid);
	BBPkeepref(*ret= bn->batCacheid);
	return MAL_SUCCEED;
}