static BAT * MATproject_( BAT *map, BAT **bats, int len ) { BAT *res = NULL; if (ATOMstorage(bats[0]->ttype) <= TYPE_void) { /*error*/ } else if (ATOMvarsized(bats[0]->ttype)) { res = MATproject_var(map, bats, len); } else if (ATOMsize(bats[0]->ttype) == sizeof(bte)) { res = MATproject_bte(map, bats, len, bats[0]->ttype); } else if (ATOMsize(bats[0]->ttype) == sizeof(sht)) { res = MATproject_sht(map, bats, len, bats[0]->ttype); } else if (ATOMsize(bats[0]->ttype) == sizeof(int)) { res = MATproject_int(map, bats, len, bats[0]->ttype); } else if (ATOMsize(bats[0]->ttype) == sizeof(lng)) { res = MATproject_lng(map, bats, len, bats[0]->ttype); #ifdef HAVE_HGE } else if (ATOMsize(bats[0]->ttype) == sizeof(hge)) { res = MATproject_hge(map, bats, len, bats[0]->ttype); #endif } else { res = MATproject_any(map, bats, len); } if(res){ res->tsorted = 0; res->trevsorted = 0; res->T->nonil = MATnonil(bats, len); } return res; }
/* * User defined modules may introduce fixed sized types * to store information in BATs. */ int malAtomSize(int size, const char *name) { int i = 0; i = ATOMindex(name); BATatoms[i].storage = i; BATatoms[i].size = size; assert_shift_width(ATOMelmshift(ATOMsize(i)), ATOMsize(i)); return i; }
/* * @- Atomic ADT functions */ size_t ATOMlen(int t, const void *src) { size_t (*l)(const void *) = BATatoms[t].atomLen; return l ? (*l) (src) : ATOMsize(t); }
int malAtomProperty(MalBlkPtr mb, InstrPtr pci) { str name; int tpe; (void)mb; /* fool compilers */ assert(pci != 0); name = getFunctionId(pci); tpe = getTypeIndex(getModuleId(pci), (int)strlen(getModuleId(pci)), TYPE_any); if (tpe < 0 || tpe >= GDKatomcnt || tpe >= MAXATOMS) return 0; assert(pci->fcn != NULL); switch (name[0]) { case 'd': if (idcmp("del", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomDel = (void (*)(Heap *, var_t *))pci->fcn; setAtomName(pci); return 1; } break; case 'c': if (idcmp("cmp", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomCmp = (int (*)(const void *, const void *))pci->fcn; BATatoms[tpe].linear = 1; setAtomName(pci); return 1; } break; case 'f': if (idcmp("fromstr", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomFromStr = (int (*)(const char *, int *, ptr *))pci->fcn; setAtomName(pci); return 1; } if (idcmp("fix", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomFix = (int (*)(const void *))pci->fcn; setAtomName(pci); return 1; } break; case 'h': if (idcmp("heap", name) == 0 && pci->argc == 1) { /* heap function makes an atom varsized */ BATatoms[tpe].size = sizeof(var_t); assert_shift_width(ATOMelmshift(ATOMsize(tpe)), ATOMsize(tpe)); BATatoms[tpe].align = sizeof(var_t); BATatoms[tpe].atomHeap = (void (*)(Heap *, size_t))pci->fcn; setAtomName(pci); return 1; } if (idcmp("hash", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomHash = (BUN (*)(const void *))pci->fcn; setAtomName(pci); return 1; } break; case 'l': if (idcmp("length", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomLen = (int (*)(const void *))pci->fcn; setAtomName(pci); return 1; } break; case 'n': if (idcmp("null", name) == 0 && pci->argc == 1) { ptr atmnull = ((ptr (*)(void))pci->fcn)(); BATatoms[tpe].atomNull = atmnull; setAtomName(pci); return 1; } if (idcmp("nequal", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomCmp = (int (*)(const void *, const void *))pci->fcn; setAtomName(pci); return 1; } break; case 'p': if (idcmp("put", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomPut = (var_t (*)(Heap *, var_t *, const void *))pci->fcn; setAtomName(pci); return 1; } break; case 's': if (idcmp("storage", name) == 0 && pci->argc == 1) { BATatoms[tpe].storage = (*(int (*)(void))pci->fcn)(); setAtomName(pci); return 1; } break; case 't': if (idcmp("tostr", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomToStr = (int (*)(str *, int *, const void *))pci->fcn; setAtomName(pci); return 1; } break; case 'u': if (idcmp("unfix", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomUnfix = (int (*)(const void *))pci->fcn; setAtomName(pci); return 1; } break; case 'r': if (idcmp("read", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomRead = (void *(*)(void *, stream *, size_t))pci->fcn; setAtomName(pci); return 1; } break; case 'w': if (idcmp("write", name) == 0 && pci->argc == 1) { BATatoms[tpe].atomWrite = (gdk_return (*)(const void *, stream *, size_t))pci->fcn; setAtomName(pci); return 1; } break; } return 0; }
/* * The prime routine for the BAT layer is to create a new hash index. * Its argument is the element type and the maximum number of BUNs be * stored under the hash function. */ BAT * BAThash(BAT *b, BUN masksize) { BAT *o = NULL; lng t0,t1; (void) t0; (void) t1; if (VIEWhparent(b)) { bat p = VIEWhparent(b); o = b; b = BATdescriptor(p); if (!ALIGNsynced(o, b) || BUNfirst(o) != BUNfirst(b)) { BBPunfix(b->batCacheid); b = o; o = NULL; } } MT_lock_set(&GDKhashLock(ABS(b->batCacheid)), "BAThash"); if (b->H->hash == NULL) { unsigned int tpe = ATOMstorage(b->htype); BUN cnt = BATcount(b); BUN mask; BUN p = BUNfirst(b), q = BUNlast(b), r; Hash *h = NULL; Heap *hp = NULL; str nme = BBP_physical(b->batCacheid); BATiter bi = bat_iterator(b); ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b)); /* cnt = 0, hopefully there is a proper capacity from * which we can derive enough information */ if (!cnt) cnt = BATcapacity(b); if (b->htype == TYPE_void) { if (b->hseqbase == oid_nil) { MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash"); ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n"); return NULL; } ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n"); tpe = TYPE_void; } /* determine hash mask size p = first; then no dynamic * scheme */ if (masksize > 0) { mask = HASHmask(masksize); } else if (ATOMsize(ATOMstorage(tpe)) == 1) { mask = (1 << 8); } else if (ATOMsize(ATOMstorage(tpe)) == 2) { mask = (1 << 12); } else if (b->hkey) { mask = HASHmask(cnt); } else { /* dynamic hash: we start with * HASHmask(cnt/64); if there are too many * collisions we try HASHmask(cnt/16), then * HASHmask(cnt/4), and finally * HASHmask(cnt). */ mask = HASHmask(cnt >> 6); p += (cnt >> 2); /* try out on first 25% of b */ if (p > q) p = q; } if (mask < 1024) mask = 1024; t0 = GDKusec(); do { BUN nslots = mask >> 3; /* 1/8 full is too full */ r = BUNfirst(b); if (hp) { HEAPfree(hp); GDKfree(hp); } if (h) { ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n"); GDKfree(h); } /* create the hash structures */ hp = (Heap *) GDKzalloc(sizeof(Heap)); if (hp && (hp->filename = GDKmalloc(strlen(nme) + 12)) != NULL) sprintf(hp->filename, "%s.%chash", nme, b->batCacheid > 0 ? 'h' : 't'); if (hp == NULL || hp->filename == NULL || (h = HASHnew(hp, ATOMtype(b->htype), BATcapacity(b), mask)) == NULL) { MT_lock_unset(&GDKhashLock(ABS(b->batCacheid)), "BAThash"); if (hp != NULL) { GDKfree(hp->filename); GDKfree(hp); } return NULL; } switch (tpe) { case TYPE_bte: starthash(bte); break; case TYPE_sht: starthash(sht); break; case TYPE_int: case TYPE_flt: starthash(int); break; case TYPE_dbl: case TYPE_lng: starthash(lng); break; default: for (; r < p; r++) { ptr v = BUNhead(bi, r); BUN c = (BUN) heap_hash_any(b->H->vheap, h, v); if ( HASHget(h,c) == HASHnil(h) && nslots-- == 0) break; /* mask too full */ HASHputlink(h,r, HASHget(h,c)); HASHput(h,c, r); } break; } } while (r < p && mask < cnt && (mask <<= 2)); /* finish the hashtable with the current mask */ p = r; switch (tpe) { case TYPE_bte: finishhash(bte); break; case TYPE_sht: finishhash(sht); break; case TYPE_int: case TYPE_flt: finishhash(int); break; case TYPE_dbl: case TYPE_lng: finishhash(lng); break; default: for (; p < q; p++) { ptr v = BUNhead(bi, p); BUN c = (BUN) heap_hash_any(b->H->vheap, h, v); HASHputlink(h,p, HASHget(h,c)); HASHput(h,c,p); } break; } b->H->hash = h; t1 = GDKusec(); ALGODEBUG fprintf(stderr, "#BAThash: hash construction "LLFMT" usec\n", t1-t0); ALGODEBUG HASHcollisions(b,b->H->hash); }
static ssize_t numFromStr(const char *src, size_t *len, void **dst, int tp, bool external) { const char *p = src; size_t sz = ATOMsize(tp); #ifdef HAVE_HGE hge base = 0; #else lng base = 0; #endif int sign = 1; /* a valid number has the following syntax: * [-+]?[0-9]+([eE][0-9]+)?(LL)? -- PCRE syntax, or in other words * optional sign, one or more digits, optional exponent, optional LL * the exponent has the following syntax: * lower or upper case letter E, one or more digits * embedded spaces are not allowed * the optional LL at the end are only allowed for lng and hge * values */ atommem(sz); if (GDK_STRNIL(src)) { memcpy(*dst, ATOMnilptr(tp), sz); return 1; } while (GDKisspace(*p)) p++; if (!num10(*p)) { switch (*p) { case 'n': if (external) { memcpy(*dst, ATOMnilptr(tp), sz); if (p[1] == 'i' && p[2] == 'l') { p += 3; return (ssize_t) (p - src); } } GDKerror("not a number"); goto bailout; case '-': sign = -1; p++; break; case '+': p++; break; } if (!num10(*p)) { GDKerror("not a number"); goto bailout; } } do { int dig = base10(*p); if (base > maxdiv[1].maxval || (base == maxdiv[1].maxval && dig > maxmod10)) { /* overflow */ goto overflow; } base = 10 * base + dig; p++; } while (num10(*p)); if ((*p == 'e' || *p == 'E') && num10(p[1])) { p++; if (base == 0) { /* if base is 0, any exponent will do, the * result is still 0 */ while (num10(*p)) p++; } else { int exp = 0; do { /* this calculation cannot overflow */ exp = exp * 10 + base10(*p); if (exp >= (int) (sizeof(maxdiv) / sizeof(maxdiv[0]))) { /* overflow */ goto overflow; } p++; } while (num10(*p)); if (base > maxdiv[exp].maxval) { /* overflow */ goto overflow; } base *= maxdiv[exp].scale; } } base *= sign; switch (sz) { case 1: { bte **dstbte = (bte **) dst; if (base < GDK_bte_min || base > GDK_bte_max) { goto overflow; } **dstbte = (bte) base; break; } case 2: { sht **dstsht = (sht **) dst; if (base < GDK_sht_min || base > GDK_sht_max) { goto overflow; } **dstsht = (sht) base; break; } case 4: { int **dstint = (int **) dst; if (base < GDK_int_min || base > GDK_int_max) { goto overflow; } **dstint = (int) base; break; } case 8: { lng **dstlng = (lng **) dst; #ifdef HAVE_HGE if (base < GDK_lng_min || base > GDK_lng_max) { goto overflow; } #endif **dstlng = (lng) base; if (p[0] == 'L' && p[1] == 'L') p += 2; break; } #ifdef HAVE_HGE case 16: { hge **dsthge = (hge **) dst; **dsthge = (hge) base; if (p[0] == 'L' && p[1] == 'L') p += 2; break; } #endif } while (GDKisspace(*p)) p++; return (ssize_t) (p - src); overflow: while (num10(*p)) p++; GDKerror("overflow: \"%.*s\" does not fit in %s\n", (int) (p - src), src, ATOMname(tp)); bailout: memcpy(*dst, ATOMnilptr(tp), sz); return -1; }
static str MATsort(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, int rev) { bat *res_id = (bat*) getArgReference(stk,pci,0); /* result sorted */ bat *map_id = (bat*) getArgReference(stk,pci,1); /* result map */ BAT *res = NULL, *map = NULL; /* rest of the args are sorted parts, (excluding sorted and map) */ BAT **bats = GDKzalloc(sizeof(BAT*) * pci->argc - 2); BUN pcnt = 0; int i, len = pci->argc-2; (void) cntxt; (void) mb; (void) stk; if( bats == NULL) throw(SQL, "mat.sortTail",MAL_MALLOC_FAIL); for (i=2; i<pci->argc; i++) { bat id = *(bat*) getArgReference(stk,pci,i); bats[i-2] = BATdescriptor(id); if (!bats[i-2]) goto error; pcnt += BATcount(bats[i-2]); } if (ATOMstorage(bats[0]->ttype) <= TYPE_void) { /*error*/ } else if (ATOMvarsized(bats[0]->ttype)) { res = MATsort_any(&map, bats, len, pcnt, rev); } else if (ATOMsize(bats[0]->ttype) == sizeof(bte)) { res = MATsort_bte(&map, bats, len, pcnt, rev); } else if (ATOMsize(bats[0]->ttype) == sizeof(sht)) { res = MATsort_sht(&map, bats, len, pcnt, rev); } else if (ATOMsize(bats[0]->ttype) == sizeof(int)) { res = MATsort_int(&map, bats, len, pcnt, rev); } else if (ATOMsize(bats[0]->ttype) == sizeof(lng)) { res = MATsort_lng(&map, bats, len, pcnt, rev); #ifdef HAVE_HGE } else if (ATOMsize(bats[0]->ttype) == sizeof(hge)) { res = MATsort_hge(&map, bats, len, pcnt, rev); #endif } else { res = MATsort_any(&map, bats, len, pcnt, rev); } if (res) { res->T->nonil = MATnonil(bats, len); if (rev) { res->trevsorted = 1; res->tsorted = res->batCount <= 1; } else { res->tsorted = 1; res->trevsorted = res->batCount <= 1; } } error: for (i=0; i<len && bats[i]; i++) BBPunfix(bats[i]->batCacheid); GDKfree(bats); if (map && res) { map->tsorted = 0; map->trevsorted = 0; BBPkeepref( *map_id = map->batCacheid); BBPkeepref( *res_id = res->batCacheid); return MAL_SUCCEED; } if (map) BBPunfix(map->batCacheid); throw(SQL, "mat.sortTail","Cannot access descriptor"); }
/* * The prime routine for the BAT layer is to create a new hash index. * Its argument is the element type and the maximum number of BUNs be * stored under the hash function. */ gdk_return BAThash(BAT *b, BUN masksize) { BAT *o = NULL; lng t0 = 0, t1 = 0; if (BATcheckhash(b)) { if (o != NULL) { o->T->hash = b->T->hash; BBPunfix(b->batCacheid); } return GDK_SUCCEED; } MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BAThash"); if (b->T->hash == NULL) { unsigned int tpe = ATOMbasetype(b->ttype); BUN cnt = BATcount(b); BUN mask, maxmask = 0; BUN p = BUNfirst(b), q = BUNlast(b), r; Hash *h = NULL; Heap *hp; const char *nme = BBP_physical(b->batCacheid); const char *ext = b->batCacheid > 0 ? "thash" : "hhash"; BATiter bi = bat_iterator(b); #ifdef PERSISTENTHASH int fd; #endif ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b)); if ((hp = GDKzalloc(sizeof(*hp))) == NULL || (hp->farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0 || (hp->filename = GDKmalloc(strlen(nme) + 12)) == NULL) { MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash"); GDKfree(hp); return GDK_FAIL; } sprintf(hp->filename, "%s.%s", nme, ext); /* cnt = 0, hopefully there is a proper capacity from * which we can derive enough information */ if (!cnt) cnt = BATcapacity(b); if (b->ttype == TYPE_void) { if (b->tseqbase == oid_nil) { MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash"); ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n"); GDKfree(hp->filename); GDKfree(hp); return GDK_FAIL; } ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n"); tpe = TYPE_void; } /* determine hash mask size p = first; then no dynamic * scheme */ if (masksize > 0) { mask = HASHmask(masksize); } else if (ATOMsize(tpe) == 1) { mask = (1 << 8); } else if (ATOMsize(tpe) == 2) { mask = (1 << 16); } else if (b->tkey) { mask = HASHmask(cnt); } else { /* dynamic hash: we start with * HASHmask(cnt)/64; if there are too many * collisions we try HASHmask(cnt)/16, then * HASHmask(cnt)/4, and finally * HASHmask(cnt). */ maxmask = HASHmask(cnt); mask = maxmask >> 6; p += (cnt >> 2); /* try out on first 25% of b */ if (p > q) p = q; } t0 = GDKusec(); do { BUN nslots = mask >> 3; /* 1/8 full is too full */ r = BUNfirst(b); if (h) { char *fnme; bte farmid; ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n"); fnme = GDKstrdup(hp->filename); farmid = hp->farmid; HEAPfree(hp, 1); memset(hp, 0, sizeof(*hp)); hp->filename = fnme; hp->farmid = farmid; GDKfree(h); h = NULL; } /* create the hash structures */ if ((h = HASHnew(hp, ATOMtype(b->ttype), BATcapacity(b), mask, BATcount(b))) == NULL) { MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash"); GDKfree(hp->filename); GDKfree(hp); return GDK_FAIL; } switch (tpe) { case TYPE_bte: starthash(bte); break; case TYPE_sht: starthash(sht); break; case TYPE_int: case TYPE_flt: #if SIZEOF_OID == SIZEOF_INT case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_INT case TYPE_wrd: #endif starthash(int); break; case TYPE_dbl: case TYPE_lng: #if SIZEOF_OID == SIZEOF_LNG case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_LNG case TYPE_wrd: #endif starthash(lng); break; #ifdef HAVE_HGE case TYPE_hge: starthash(hge); break; #endif default: for (; r < p; r++) { ptr v = BUNtail(bi, r); BUN c = (BUN) heap_hash_any(b->T->vheap, h, v); if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) break; /* mask too full */ HASHputlink(h, r, HASHget(h, c)); HASHput(h, c, r); } break; } } while (r < p && mask < maxmask && (mask <<= 2)); /* finish the hashtable with the current mask */ p = r; switch (tpe) { case TYPE_bte: finishhash(bte); break; case TYPE_sht: finishhash(sht); break; case TYPE_int: case TYPE_flt: #if SIZEOF_OID == SIZEOF_INT case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_INT case TYPE_wrd: #endif finishhash(int); break; case TYPE_dbl: case TYPE_lng: #if SIZEOF_OID == SIZEOF_LNG case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_LNG case TYPE_wrd: #endif finishhash(lng); break; #ifdef HAVE_HGE case TYPE_hge: finishhash(hge); break; #endif default: for (; p < q; p++) { ptr v = BUNtail(bi, p); BUN c = (BUN) heap_hash_any(b->T->vheap, h, v); HASHputlink(h, p, HASHget(h, c)); HASHput(h, c, p); } break; } #ifdef PERSISTENTHASH if ((BBP_status(b->batCacheid) & BBPEXISTING) && b->batInserted == b->batCount && HEAPsave(hp, nme, ext) == GDK_SUCCEED && (fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >= 0) { ALGODEBUG fprintf(stderr, "#BAThash: persisting hash %d\n", b->batCacheid); ((size_t *) hp->base)[0] |= 1 << 24; if (write(fd, hp->base, SIZEOF_SIZE_T) < 0) perror("write hash"); if (!(GDKdebug & FORCEMITOMASK)) { #if defined(NATIVE_WIN32) _commit(fd); #elif defined(HAVE_FDATASYNC) fdatasync(fd); #elif defined(HAVE_FSYNC) fsync(fd); #endif } close(fd); } else ALGODEBUG fprintf(stderr, "#BAThash: NOT persisting hash %d\n", b->batCacheid); #endif b->T->hash = h; t1 = GDKusec(); ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT " usec\n", t1 - t0); ALGODEBUG HASHcollisions(b, b->T->hash); }