/* in the commit prelude, the delta status in the memory image of all * bats is commited */ static gdk_return prelude(int cnt, bat *subcommit) { int i = 0; while (++i < cnt) { bat bid = subcommit ? subcommit[i] : i; if (BBP_status(bid) & BBPPERSISTENT) { BAT *b = BBP_cache(bid); if (b == NULL && (BBP_status(bid) & BBPSWAPPED)) { b = BBPquickdesc(bid, TRUE); if (b == NULL) return GDK_FAIL; } if (b) { assert(!isVIEW(b)); assert(b->batRole == PERSISTENT); BATcommit(b); } } } return GDK_SUCCEED; }
str CMDbbpKind(bat *ret) { BAT *b; int i; b = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); if (b == 0) throw(MAL, "catalog.bbpKind", MAL_MALLOC_FAIL); BATseqbase(b,0); BBPlock("CMDbbpKind"); for (i = 1; i < getBBPsize(); i++) if (i != b->batCacheid) if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) { char *mode = NULL; if ((BBP_status(i) & BBPDELETED) || !(BBP_status(i) & BBPPERSISTENT)) mode = "transient"; else mode = "persistent"; if (mode) BUNappend(b, mode, FALSE); } BBPunlock("CMDbbpKind"); if (!(b->batDirty&2)) BATsetaccess(b, BAT_READ); pseudo(ret,b,"bbp","kind"); return MAL_SUCCEED; }
/* in the commit epilogue, the BBP-status of the bats is changed to * reflect their presence in the succeeded checkpoint. Also bats from * the previous checkpoint that were deleted now are physically * destroyed. */ static void epilogue(int cnt, bat *subcommit) { int i = 0; while (++i < cnt) { bat bid = subcommit ? subcommit[i] : i; if (BBP_status(bid) & BBPPERSISTENT) { BBP_status_on(bid, BBPEXISTING, subcommit ? "TMsubcommit" : "TMcommit"); } else if (BBP_status(bid) & BBPDELETED) { /* check mmap modes of bats that are now * transient. this has to be done after the * commit succeeded, because the mmap modes * allowed on transient bats would be * dangerous on persistent bats. If the commit * failed, the already processed bats that * would become transient after the commit, * but didn't due to the failure, would be a * consistency risk. */ BAT *b = BBP_cache(bid); if (b) { /* check mmap modes */ if (BATcheckmodes(b, true) != GDK_SUCCEED) fprintf(stderr, "#epilogue: BATcheckmodes failed\n"); } } if ((BBP_status(bid) & BBPDELETED) && BBP_refs(bid) <= 0 && BBP_lrefs(bid) <= 0) { BAT *b = BBPquickdesc(bid, TRUE); /* the unloaded ones are deleted without * loading deleted disk images */ if (b) { BATdelete(b); if (BBP_cache(bid)) { /* those that quickdesc * decides to load => free * memory */ BATfree(b); } } BBPclear(bid); /* clear with locking */ } BBP_status_off(bid, BBPDELETED | BBPSWAPPED | BBPNEW, subcommit ? "TMsubcommit" : "TMcommit"); } GDKclrerr(); }
/* * The BAT dirty status:dirty => (mem != disk); diffs = not-committed */ str CMDbbpDirty(bat *ret) { BAT *b; int i; b = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); if (b == 0) throw(MAL, "catalog.bbpDirty", MAL_MALLOC_FAIL); BATseqbase(b,0); BBPlock("CMDbbpDirty"); for (i = 1; i < getBBPsize(); i++) if (i != b->batCacheid) if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) { BAT *bn = BBP_cache(i); BUNappend(b, bn ? BATdirty(bn) ? "dirty" : DELTAdirty(bn) ? "diffs" : "clean" : (BBP_status(i) & BBPSWAPPED) ? "diffs" : "clean", FALSE); } BBPunlock("CMDbbpDirty"); if (!(b->batDirty&2)) BATsetaccess(b, BAT_READ); pseudo(ret,b,"bbp","status"); return MAL_SUCCEED; }
/* * The prime routine for the BAT layer is to create a new hash index. * Its argument is the element type and the maximum number of BUNs be * stored under the hash function. */ gdk_return BAThash(BAT *b, BUN masksize) { BAT *o = NULL; lng t0 = 0, t1 = 0; if (BATcheckhash(b)) { if (o != NULL) { o->T->hash = b->T->hash; BBPunfix(b->batCacheid); } return GDK_SUCCEED; } MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BAThash"); if (b->T->hash == NULL) { unsigned int tpe = ATOMbasetype(b->ttype); BUN cnt = BATcount(b); BUN mask, maxmask = 0; BUN p = BUNfirst(b), q = BUNlast(b), r; Hash *h = NULL; Heap *hp; const char *nme = BBP_physical(b->batCacheid); const char *ext = b->batCacheid > 0 ? "thash" : "hhash"; BATiter bi = bat_iterator(b); #ifdef PERSISTENTHASH int fd; #endif ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b)); if ((hp = GDKzalloc(sizeof(*hp))) == NULL || (hp->farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) < 0 || (hp->filename = GDKmalloc(strlen(nme) + 12)) == NULL) { MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash"); GDKfree(hp); return GDK_FAIL; } sprintf(hp->filename, "%s.%s", nme, ext); /* cnt = 0, hopefully there is a proper capacity from * which we can derive enough information */ if (!cnt) cnt = BATcapacity(b); if (b->ttype == TYPE_void) { if (b->tseqbase == oid_nil) { MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash"); ALGODEBUG fprintf(stderr, "#BAThash: cannot create hash-table on void-NIL column.\n"); GDKfree(hp->filename); GDKfree(hp); return GDK_FAIL; } ALGODEBUG fprintf(stderr, "#BAThash: creating hash-table on void column..\n"); tpe = TYPE_void; } /* determine hash mask size p = first; then no dynamic * scheme */ if (masksize > 0) { mask = HASHmask(masksize); } else if (ATOMsize(tpe) == 1) { mask = (1 << 8); } else if (ATOMsize(tpe) == 2) { mask = (1 << 16); } else if (b->tkey) { mask = HASHmask(cnt); } else { /* dynamic hash: we start with * HASHmask(cnt)/64; if there are too many * collisions we try HASHmask(cnt)/16, then * HASHmask(cnt)/4, and finally * HASHmask(cnt). */ maxmask = HASHmask(cnt); mask = maxmask >> 6; p += (cnt >> 2); /* try out on first 25% of b */ if (p > q) p = q; } t0 = GDKusec(); do { BUN nslots = mask >> 3; /* 1/8 full is too full */ r = BUNfirst(b); if (h) { char *fnme; bte farmid; ALGODEBUG fprintf(stderr, "#BAThash: retry hash construction\n"); fnme = GDKstrdup(hp->filename); farmid = hp->farmid; HEAPfree(hp, 1); memset(hp, 0, sizeof(*hp)); hp->filename = fnme; hp->farmid = farmid; GDKfree(h); h = NULL; } /* create the hash structures */ if ((h = HASHnew(hp, ATOMtype(b->ttype), BATcapacity(b), mask, BATcount(b))) == NULL) { MT_lock_unset(&GDKhashLock(abs(b->batCacheid)), "BAThash"); GDKfree(hp->filename); GDKfree(hp); return GDK_FAIL; } switch (tpe) { case TYPE_bte: starthash(bte); break; case TYPE_sht: starthash(sht); break; case TYPE_int: case TYPE_flt: #if SIZEOF_OID == SIZEOF_INT case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_INT case TYPE_wrd: #endif starthash(int); break; case TYPE_dbl: case TYPE_lng: #if SIZEOF_OID == SIZEOF_LNG case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_LNG case TYPE_wrd: #endif starthash(lng); break; #ifdef HAVE_HGE case TYPE_hge: starthash(hge); break; #endif default: for (; r < p; r++) { ptr v = BUNtail(bi, r); BUN c = (BUN) heap_hash_any(b->T->vheap, h, v); if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) break; /* mask too full */ HASHputlink(h, r, HASHget(h, c)); HASHput(h, c, r); } break; } } while (r < p && mask < maxmask && (mask <<= 2)); /* finish the hashtable with the current mask */ p = r; switch (tpe) { case TYPE_bte: finishhash(bte); break; case TYPE_sht: finishhash(sht); break; case TYPE_int: case TYPE_flt: #if SIZEOF_OID == SIZEOF_INT case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_INT case TYPE_wrd: #endif finishhash(int); break; case TYPE_dbl: case TYPE_lng: #if SIZEOF_OID == SIZEOF_LNG case TYPE_oid: #endif #if SIZEOF_WRD == SIZEOF_LNG case TYPE_wrd: #endif finishhash(lng); break; #ifdef HAVE_HGE case TYPE_hge: finishhash(hge); break; #endif default: for (; p < q; p++) { ptr v = BUNtail(bi, p); BUN c = (BUN) heap_hash_any(b->T->vheap, h, v); HASHputlink(h, p, HASHget(h, c)); HASHput(h, c, p); } break; } #ifdef PERSISTENTHASH if ((BBP_status(b->batCacheid) & BBPEXISTING) && b->batInserted == b->batCount && HEAPsave(hp, nme, ext) == GDK_SUCCEED && (fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >= 0) { ALGODEBUG fprintf(stderr, "#BAThash: persisting hash %d\n", b->batCacheid); ((size_t *) hp->base)[0] |= 1 << 24; if (write(fd, hp->base, SIZEOF_SIZE_T) < 0) perror("write hash"); if (!(GDKdebug & FORCEMITOMASK)) { #if defined(NATIVE_WIN32) _commit(fd); #elif defined(HAVE_FDATASYNC) fdatasync(fd); #elif defined(HAVE_FSYNC) fsync(fd); #endif } close(fd); } else ALGODEBUG fprintf(stderr, "#BAThash: NOT persisting hash %d\n", b->batCacheid); #endif b->T->hash = h; t1 = GDKusec(); ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT " usec\n", t1 - t0); ALGODEBUG HASHcollisions(b, b->T->hash); }
str CMDbbp(bat *ID, bat *NS, bat *HT, bat *TT, bat *CNT, bat *REFCNT, bat *LREFCNT, bat *LOCATION, bat *HEAT, bat *DIRTY, bat *STATUS, bat *KIND) { BAT *id, *ns, *ht, *tt, *cnt, *refcnt, *lrefcnt, *location, *heat, *dirty, *status, *kind, *bn; int i; char buf[MAXPATHLEN]; id = BATnew(TYPE_void, TYPE_int, getBBPsize(), TRANSIENT); ns = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); ht = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); tt = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); cnt = BATnew(TYPE_void, TYPE_lng, getBBPsize(), TRANSIENT); refcnt = BATnew(TYPE_void, TYPE_int, getBBPsize(), TRANSIENT); lrefcnt = BATnew(TYPE_void, TYPE_int, getBBPsize(), TRANSIENT); location = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); heat = BATnew(TYPE_void, TYPE_int, getBBPsize(), TRANSIENT); dirty = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); status = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); kind = BATnew(TYPE_void, TYPE_str, getBBPsize(), TRANSIENT); if (!id || !ns || !ht || !tt || !cnt || !refcnt || !lrefcnt || !location || !heat || !dirty || !status || !kind) { BBPreclaim(id); BBPreclaim(ns); BBPreclaim(ht); BBPreclaim(tt); BBPreclaim(cnt); BBPreclaim(refcnt); BBPreclaim(lrefcnt); BBPreclaim(location); BBPreclaim(heat); BBPreclaim(dirty); BBPreclaim(status); BBPreclaim(kind); throw(MAL, "catalog.bbp", MAL_MALLOC_FAIL); } BATseqbase(id, 0); BATseqbase(ns, 0); BATseqbase(ht, 0); BATseqbase(tt, 0); BATseqbase(cnt, 0); BATseqbase(refcnt, 0); BATseqbase(lrefcnt, 0); BATseqbase(location, 0); BATseqbase(heat, 0); BATseqbase(dirty, 0); BATseqbase(status, 0); BATseqbase(kind, 0); for (i = 1; i < getBBPsize(); i++) { if (BBP_logical(i) && (BBP_refs(i) || BBP_lrefs(i))) { bn = BATdescriptor(i); if (bn) { lng l = BATcount(bn); int heat_ = BBP_lastused(i); char *loc = BBP_cache(i) ? "load" : "disk"; char *mode = "persistent"; int refs = BBP_refs(i); int lrefs = BBP_lrefs(i); if ((BBP_status(i) & BBPDELETED) || !(BBP_status(i) & BBPPERSISTENT)) mode = "transient"; snprintf(buf, MAXPATHLEN, "%s", BBP_physical(i)); BUNappend(id, &i, FALSE); BUNappend(ns, BBP_logical(i), FALSE); BUNappend(ht, BATatoms[BAThtype(bn)].name, FALSE); BUNappend(tt, BATatoms[BATttype(bn)].name, FALSE); BUNappend(cnt, &l, FALSE); BUNappend(refcnt, &refs, FALSE); BUNappend(lrefcnt, &lrefs, FALSE); BUNappend(location, buf, FALSE); BUNappend(heat, &heat_, FALSE); BUNappend(dirty, bn ? BATdirty(bn) ? "dirty" : DELTAdirty(bn) ? "diffs" : "clean" : (BBP_status(i) & BBPSWAPPED) ? "diffs" : "clean", FALSE); BUNappend(status, loc, FALSE); BUNappend(kind, mode, FALSE); BBPunfix(bn->batCacheid); } } } BBPkeepref(*ID = id->batCacheid); BBPkeepref(*NS = ns->batCacheid); BBPkeepref(*HT = ht->batCacheid); BBPkeepref(*TT = tt->batCacheid); BBPkeepref(*CNT = cnt->batCacheid); BBPkeepref(*REFCNT = refcnt->batCacheid); BBPkeepref(*LREFCNT = lrefcnt->batCacheid); BBPkeepref(*LOCATION = location->batCacheid); BBPkeepref(*HEAT = heat->batCacheid); BBPkeepref(*DIRTY = dirty->batCacheid); BBPkeepref(*STATUS = status->batCacheid); BBPkeepref(*KIND = kind->batCacheid); return MAL_SUCCEED; }