Beispiel #1
0
/*
 * The routines @emph{ALIGN_synced} and @emph{ALIGN_ordered} allow to
 * simply query the alignment status of the two head columns of two
 * BATs.
 */
int
ALIGNsynced(BAT *b1, BAT *b2)
{
	BATcheck(b1, "ALIGNsynced: bat 1 required", 0);
	BATcheck(b2, "ALIGNsynced: bat 2 required", 0);

	/* first try to prove head columns are not in sync */
	if (BATcount(b1) != BATcount(b2))
		return 0;
	if (ATOMtype(BAThtype(b1)) != ATOMtype(BAThtype(b2)))
		return 0;
	if (BAThvoid(b1) && BAThvoid(b2))
		return (b1->hseqbase == b2->hseqbase);

	/* then try that they are */
	if (b1->batCacheid == b2->batCacheid)
		return 1;	/* same bat. trivial case */
	if (BATcount(b1) == 0)
		return 1;	/* empty bats of same type. trivial case */
	if (b1->halign && b1->halign == b2->halign)
		return 1;	/* columns marked as equal by algorithmics */
	if (VIEWparentcol(b1) && ALIGNsynced(BBPcache(VIEWhparent(b1)), b2))
		return 1;	/* view on same bat --- left recursive def.. */
	if (VIEWparentcol(b2) && ALIGNsynced(b1, BBPcache(VIEWhparent(b2))))
		return 1;	/* view on same bat --- right recursive def.. */

	return 0;		/* we simply don't know */
}
Beispiel #2
0
inline int
findGDKtype(int type)
{
	if (type == TYPE_any || type== TYPE_void)
		return TYPE_void;
	if (isaBatType(type))
		return TYPE_bat;
	return ATOMtype(type);
}
Beispiel #3
0
ptr
ATOMnil(int t)
{
	const void *src = ATOMnilptr(t);
	size_t len = ATOMlen(ATOMtype(t), src);
	ptr dst = GDKmalloc(len);

	if (dst)
		memcpy(dst, src, len);
	return dst;
}
Beispiel #4
0
log_bid 
ebat2real(log_bid b, oid ibase)
{
	/* make a copy of b */
	BAT *o = temp_descriptor(b);
	BAT *c = BATcopy(o, TYPE_void, ATOMtype(o->ttype), TRUE);
	log_bid r;

	BATseqbase(c, ibase );
	c->H->dense = 1;
	r = temp_create(c);
	bat_destroy(c);
	bat_destroy(o);
	return r;
}
Beispiel #5
0
/*
 * The prime routine for the BAT layer is to create a new hash index.
 * Its argument is the element type and the maximum number of BUNs be
 * stored under the hash function.
 */
BAT *
BAThash(BAT *b, BUN masksize)
{
	BAT *o = NULL;
	lng t0,t1;
	(void) t0; 
	(void) t1;

	if (VIEWhparent(b)) {
		bat p = VIEWhparent(b);
		o = b;
		b = BATdescriptor(p);
		if (!ALIGNsynced(o, b) || BUNfirst(o) != BUNfirst(b)) {
			BBPunfix(b->batCacheid);
			b = o;
			o = NULL;
		}
	}
	MT_lock_set(&GDKhashLock(ABS(b->batCacheid)), "BAThash");
	if (b->H->hash == NULL) {
		unsigned int tpe = ATOMstorage(b->htype);
		BUN cnt = BATcount(b);
		BUN mask;
		BUN p = BUNfirst(b), q = BUNlast(b), r;
		Hash *h = NULL;
		Heap *hp = NULL;
		str nme = BBP_physical(b->batCacheid);
		BATiter bi = bat_iterator(b);

		ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b));
		/* cnt = 0, hopefully there is a proper capacity from
		 * which we can derive enough information */
		if (!cnt)
			cnt = BATcapacity(b);

		if (b->htype == TYPE_void) {
			if (b->hseqbase == oid_nil) {
				MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash");
				ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n");
				return NULL;
			}
			ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n");

			tpe = TYPE_void;
		}
		/* determine hash mask size p = first; then no dynamic
		 * scheme */
		if (masksize > 0) {
			mask = HASHmask(masksize);
		} else if (ATOMsize(ATOMstorage(tpe)) == 1) {
			mask = (1 << 8);
		} else if (ATOMsize(ATOMstorage(tpe)) == 2) {
			mask = (1 << 12);
		} else if (b->hkey) {
			mask = HASHmask(cnt);
		} else {
			/* dynamic hash: we start with
			 * HASHmask(cnt/64); if there are too many
			 * collisions we try HASHmask(cnt/16), then
			 * HASHmask(cnt/4), and finally
			 * HASHmask(cnt).  */
			mask = HASHmask(cnt >> 6);
			p += (cnt >> 2);	/* try out on first 25% of b */
			if (p > q)
				p = q;
		}

		if (mask < 1024)
			mask = 1024;
		t0 = GDKusec();
		do {
			BUN nslots = mask >> 3;	/* 1/8 full is too full */

			r = BUNfirst(b);
			if (hp) {
				HEAPfree(hp);
				GDKfree(hp);
			}
			if (h) {
				ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n");
				GDKfree(h);
			}
			/* create the hash structures */
			hp = (Heap *) GDKzalloc(sizeof(Heap));
			if (hp &&
			    (hp->filename = GDKmalloc(strlen(nme) + 12)) != NULL)
				sprintf(hp->filename, "%s.%chash", nme, b->batCacheid > 0 ? 'h' : 't');
			if (hp == NULL ||
			    hp->filename == NULL ||
			    (h = HASHnew(hp, ATOMtype(b->htype), BATcapacity(b), mask)) == NULL) {

				MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash");
				if (hp != NULL) {
					GDKfree(hp->filename);
					GDKfree(hp);
				}
				return NULL;
			}

			switch (tpe) {
			case TYPE_bte:
				starthash(bte);
				break;
			case TYPE_sht:
				starthash(sht);
				break;
			case TYPE_int:
			case TYPE_flt:
				starthash(int);
				break;
			case TYPE_dbl:
			case TYPE_lng:
				starthash(lng);
				break;
			default:
				for (; r < p; r++) {
					ptr v = BUNhead(bi, r);
					BUN c = (BUN) heap_hash_any(b->H->vheap, h, v);

					if ( HASHget(h,c) == HASHnil(h) &&
					    nslots-- == 0)
						break;	/* mask too full */
					HASHputlink(h,r, HASHget(h,c));
					HASHput(h,c, r);
				}
				break;
			}
		} while (r < p && mask < cnt && (mask <<= 2));

		/* finish the hashtable with the current mask */
		p = r;
		switch (tpe) {
		case TYPE_bte:
			finishhash(bte);
			break;
		case TYPE_sht:
			finishhash(sht);
			break;
		case TYPE_int:
		case TYPE_flt:
			finishhash(int);
			break;
		case TYPE_dbl:
		case TYPE_lng:
			finishhash(lng);
			break;
		default:
			for (; p < q; p++) {
				ptr v = BUNhead(bi, p);
				BUN c = (BUN) heap_hash_any(b->H->vheap, h, v);

				HASHputlink(h,p, HASHget(h,c));
				HASHput(h,c,p);
			}
			break;
		}
		b->H->hash = h;
		t1 = GDKusec();
		ALGODEBUG 
				fprintf(stderr, "#BAThash: hash construction "LLFMT" usec\n", t1-t0);
		ALGODEBUG HASHcollisions(b,b->H->hash);
	}
Beispiel #6
0
/*
 * The prime routine for the BAT layer is to create a new hash index.
 * Its argument is the element type and the maximum number of BUNs be
 * stored under the hash function.
 */
gdk_return
BAThash(BAT *b, BUN masksize)
{
	BAT *o = NULL;
	lng t0 = 0, t1 = 0;

	if (BATcheckhash(b)) {
		if (o != NULL) {
			o->T->hash = b->T->hash;
			BBPunfix(b->batCacheid);
		}
		return GDK_SUCCEED;
	}
	MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BAThash");
	if (b->T->hash == NULL) {
		unsigned int tpe = ATOMbasetype(b->ttype);
		BUN cnt = BATcount(b);
		BUN mask, maxmask = 0;
		BUN p = BUNfirst(b), q = BUNlast(b), r;
		Hash *h = NULL;
		Heap *hp;
		const char *nme = BBP_physical(b->batCacheid);
		const char *ext = b->batCacheid > 0 ? "thash" : "hhash";
		BATiter bi = bat_iterator(b);
#ifdef PERSISTENTHASH
		int fd;
#endif

		ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b));
		if ((hp = GDKzalloc(sizeof(*hp))) == NULL ||
		    (hp->farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0 ||
		    (hp->filename = GDKmalloc(strlen(nme) + 12)) == NULL) {
			MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash");
			GDKfree(hp);
			return GDK_FAIL;
		}
		sprintf(hp->filename, "%s.%s", nme, ext);

		/* cnt = 0, hopefully there is a proper capacity from
		 * which we can derive enough information */
		if (!cnt)
			cnt = BATcapacity(b);

		if (b->ttype == TYPE_void) {
			if (b->tseqbase == oid_nil) {
				MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash");
				ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n");
				GDKfree(hp->filename);
				GDKfree(hp);
				return GDK_FAIL;
			}
			ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n");

			tpe = TYPE_void;
		}
		/* determine hash mask size p = first; then no dynamic
		 * scheme */
		if (masksize > 0) {
			mask = HASHmask(masksize);
		} else if (ATOMsize(tpe) == 1) {
			mask = (1 << 8);
		} else if (ATOMsize(tpe) == 2) {
			mask = (1 << 16);
		} else if (b->tkey) {
			mask = HASHmask(cnt);
		} else {
			/* dynamic hash: we start with
			 * HASHmask(cnt)/64; if there are too many
			 * collisions we try HASHmask(cnt)/16, then
			 * HASHmask(cnt)/4, and finally
			 * HASHmask(cnt).  */
			maxmask = HASHmask(cnt);
			mask = maxmask >> 6;
			p += (cnt >> 2);	/* try out on first 25% of b */
			if (p > q)
				p = q;
		}

		t0 = GDKusec();

		do {
			BUN nslots = mask >> 3;	/* 1/8 full is too full */

			r = BUNfirst(b);
			if (h) {
				char *fnme;
				bte farmid;

				ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n");
				fnme = GDKstrdup(hp->filename);
				farmid = hp->farmid;
				HEAPfree(hp, 1);
				memset(hp, 0, sizeof(*hp));
				hp->filename = fnme;
				hp->farmid = farmid;
				GDKfree(h);
				h = NULL;
			}
			/* create the hash structures */
			if ((h = HASHnew(hp, ATOMtype(b->ttype), BATcapacity(b), mask, BATcount(b))) == NULL) {

				MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash");
				GDKfree(hp->filename);
				GDKfree(hp);
				return GDK_FAIL;
			}

			switch (tpe) {
			case TYPE_bte:
				starthash(bte);
				break;
			case TYPE_sht:
				starthash(sht);
				break;
			case TYPE_int:
			case TYPE_flt:
#if SIZEOF_OID == SIZEOF_INT
			case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_INT
			case TYPE_wrd:
#endif
				starthash(int);
				break;
			case TYPE_dbl:
			case TYPE_lng:
#if SIZEOF_OID == SIZEOF_LNG
			case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_LNG
			case TYPE_wrd:
#endif
				starthash(lng);
				break;
#ifdef HAVE_HGE
			case TYPE_hge:
				starthash(hge);
				break;
#endif
			default:
				for (; r < p; r++) {
					ptr v = BUNtail(bi, r);
					BUN c = (BUN) heap_hash_any(b->T->vheap, h, v);

					if (HASHget(h, c) == HASHnil(h) &&
					    nslots-- == 0)
						break;	/* mask too full */
					HASHputlink(h, r, HASHget(h, c));
					HASHput(h, c, r);
				}
				break;
			}
		} while (r < p && mask < maxmask && (mask <<= 2));

		/* finish the hashtable with the current mask */
		p = r;
		switch (tpe) {
		case TYPE_bte:
			finishhash(bte);
			break;
		case TYPE_sht:
			finishhash(sht);
			break;
		case TYPE_int:
		case TYPE_flt:
#if SIZEOF_OID == SIZEOF_INT
		case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_INT
		case TYPE_wrd:
#endif
			finishhash(int);
			break;
		case TYPE_dbl:
		case TYPE_lng:
#if SIZEOF_OID == SIZEOF_LNG
		case TYPE_oid:
#endif
#if SIZEOF_WRD == SIZEOF_LNG
		case TYPE_wrd:
#endif
			finishhash(lng);
			break;
#ifdef HAVE_HGE
		case TYPE_hge:
			finishhash(hge);
			break;
#endif
		default:
			for (; p < q; p++) {
				ptr v = BUNtail(bi, p);
				BUN c = (BUN) heap_hash_any(b->T->vheap, h, v);

				HASHputlink(h, p, HASHget(h, c));
				HASHput(h, c, p);
			}
			break;
		}
#ifdef PERSISTENTHASH
		if ((BBP_status(b->batCacheid) & BBPEXISTING) &&
		    b->batInserted == b->batCount &&
		    HEAPsave(hp, nme, ext) == GDK_SUCCEED &&
		    (fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >= 0) {
			ALGODEBUG fprintf(stderr, "#BAThash: persisting hash %d\n", b->batCacheid);
			((size_t *) hp->base)[0] |= 1 << 24;
			if (write(fd, hp->base, SIZEOF_SIZE_T) < 0)
				perror("write hash");
			if (!(GDKdebug & FORCEMITOMASK)) {
#if defined(NATIVE_WIN32)
				_commit(fd);
#elif defined(HAVE_FDATASYNC)
				fdatasync(fd);
#elif defined(HAVE_FSYNC)
				fsync(fd);
#endif
			}
			close(fd);
		} else
			ALGODEBUG fprintf(stderr, "#BAThash: NOT persisting hash %d\n", b->batCacheid);
#endif
		b->T->hash = h;
		t1 = GDKusec();
		ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT " usec\n", t1 - t0);
		ALGODEBUG HASHcollisions(b, b->T->hash);
	}
Beispiel #7
0
/* return TRUE if we have a hash on the tail, even if we need to read
 * one from disk */
int
BATcheckhash(BAT *b)
{
	int ret;
	lng t;

	t = GDKusec();
	MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BATcheckhash");
	t = GDKusec() - t;
// use or ignore a persistent hash
#ifdef PERSISTENTHASH
	if (b->T->hash == NULL) {
		Hash *h;
		Heap *hp;
		const char *nme = BBP_physical(b->batCacheid);
		const char *ext = b->batCacheid > 0 ? "thash" : "hhash";
		int fd;

		if ((hp = GDKzalloc(sizeof(*hp))) != NULL &&
		    (hp->farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 &&
		    (hp->filename = GDKmalloc(strlen(nme) + 12)) != NULL) {
			sprintf(hp->filename, "%s.%s", nme, ext);

			/* check whether a persisted hash can be found */
			if ((fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >= 0) {
				size_t hdata[HASH_HEADER_SIZE];
				struct stat st;

				if ((h = GDKmalloc(sizeof(*h))) != NULL &&
				    read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
				    hdata[0] == (((size_t) 1 << 24) | HASH_VERSION) &&
				    hdata[4] == (size_t) BATcount(b) &&
				    fstat(fd, &st) == 0 &&
				    st.st_size >= (off_t) (hp->size = hp->free = (hdata[1] + hdata[2]) * hdata[3] + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
				    HEAPload(hp, nme, ext, 0) == GDK_SUCCEED) {
					h->lim = (BUN) hdata[1];
					h->type = ATOMtype(b->ttype);
					h->mask = (BUN) (hdata[2] - 1);
					h->heap = hp;
					h->width = (int) hdata[3];
					switch (h->width) {
					case BUN2:
						h->nil = (BUN) BUN2_NONE;
						break;
					case BUN4:
						h->nil = (BUN) BUN4_NONE;
						break;
#ifdef BUN8
					case BUN8:
						h->nil = (BUN) BUN8_NONE;
						break;
#endif
					default:
						assert(0);
					}
					h->Link = hp->base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
					h->Hash = (void *) ((char *) h->Link + h->lim * h->width);
					close(fd);
					b->T->hash = h;
					ALGODEBUG fprintf(stderr, "#BATcheckhash: reusing persisted hash %d\n", b->batCacheid);
					MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATcheckhash");
					return 1;
				}
				GDKfree(h);
				close(fd);
				/* unlink unusable file */
				GDKunlink(hp->farmid, BATDIR, nme, ext);
			}
			GDKfree(hp->filename);
		}
		GDKfree(hp);
	}
#endif
	ret = b->T->hash != NULL;
	MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BATcheckhash");
	ALGODEBUG if (ret) fprintf(stderr, "#BATcheckhash: already has hash %d, waited " LLFMT " usec\n", b->batCacheid, t);
	return ret;
}
Beispiel #8
0
	int type() const {
		return ATOMtype(get()->T.type);
	}