sinode *si_nodenew(sr *r) { sinode *n = (sinode*)ss_malloc(r->a, sizeof(sinode)); if (ssunlikely(n == NULL)) { sr_oom_malfunction(r->e); return NULL; } n->recover = 0; n->backup = 0; n->lru = 0; n->ac = 0; n->flags = 0; n->update_time = 0; n->used = 0; n->in_memory = 0; si_branchinit(&n->self, r); n->branch = NULL; n->branch_count = 0; n->temperature = 0; n->temperature_reads = 0; ss_fileinit(&n->file, r->vfs); ss_mmapinit(&n->map); ss_mmapinit(&n->map_swap); sv_indexinit(&n->i0); sv_indexinit(&n->i1); ss_rbinitnode(&n->node); ss_rqinitnode(&n->nodecompact); ss_rqinitnode(&n->nodebranch); ss_rqinitnode(&n->nodetemp); ss_listinit(&n->commit); return n; }
int si_nodegc_index(sr *r, svindex *i) { if (i->i.root) si_nodegc_indexgc(i->i.root, r); sv_indexinit(i); return 0; }
static void sv_indexiter_iterate1(void) { svindex i; t( sv_indexinit(&i) == 0 ); int j = 0; while (j < 16) { svv *v = st_svv(&st_r.g, NULL, j, 0, j); t( sv_indexset(&i, &st_r.r, v) == 0 ); j++; } ssiter it; ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &st_r.r, &i, SS_GTE, NULL, 0); j = 0; while (ss_iteratorhas(&it)) { sv *v = ss_iteratorof(&it); t( sv_lsn(v) == j ); ss_iteratornext(&it); j++; } t( j == 16 ); sv_indexfree(&i, &st_r.r); }
static void sv_indexiter_iterate0(void) { svindex i; t( sv_indexinit(&i) == 0 ); int keyb = 3; int keya = 7; int keyc = 15; svv *h = st_svv(&st_r.g, NULL, 0, 0, keyb); t( sv_indexset(&i, &st_r.r, h) == 0 ); svv *p = st_svv(&st_r.g, NULL, 2, 0, keyc); t( sv_indexset(&i, &st_r.r, p) == 0 ); svv *va = st_svv(&st_r.g, NULL, 1, 0, keya); t( sv_indexset(&i, &st_r.r, va) == 0 ); svv *vb = st_svv(&st_r.g, NULL, 2, 0, keya); t( sv_indexset(&i, &st_r.r, vb) == 0 ); svv *vc = st_svv(&st_r.g, NULL, 3, 0, keya); t( sv_indexset(&i, &st_r.r, vc) == 0 ); ssiter it; ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &st_r.r, &i, SS_GTE, NULL, 0); t( ss_iteratorhas(&it) != 0 ); sv *v = ss_iteratorof(&it); t( v->v == h ); ss_iteratornext(&it); v = ss_iteratorof(&it); t( v->v == vc ); ss_iteratornext(&it); v = ss_iteratorof(&it); t( v->v == vb ); ss_iteratornext(&it); v = ss_iteratorof(&it); t( v->v == va ); ss_iteratornext(&it); v = ss_iteratorof(&it); t( v->v == p ); ss_iteratornext(&it); v = ss_iteratorof(&it); t( v == NULL ); sv_indexfree(&i, &st_r.r); }
static void sv_indexiter_lte_empty(void) { svindex i; t( sv_indexinit(&i) == 0 ); ssiter it; ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &st_r.r, &i, SS_LTE, NULL, 0); t( ss_iteratorhas(&it) == 0 ); sv *v = ss_iteratorof(&it); t( v == NULL ); sv_indexfree(&i, &st_r.r); }
sinode *si_nodenew(sr *r) { sinode *n = (sinode*)sr_malloc(r->a, sizeof(sinode)); if (srunlikely(n == NULL)) { sr_malfunction(r->e, "%s", "memory allocation failed"); return NULL; } n->recover = 0; n->backup = 0; n->flags = 0; n->update_time = 0; n->used = 0; si_branchinit(&n->self); n->branch = NULL; n->branch_count = 0; sr_fileinit(&n->file, r->a); sv_indexinit(&n->i0); sv_indexinit(&n->i1); sr_rbinitnode(&n->node); sr_rqinitnode(&n->nodecompact); sr_rqinitnode(&n->nodebranch); sr_listinit(&n->commit); return n; }
static void sv_indexiter_gte_empty(void) { svindex i; t( sv_indexinit(&i) == 0 ); svv *key = st_svv(&st_r.g, &st_r.gc, 0, 0, 7); ssiter it; ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &st_r.r, &i, SS_GTE, sv_vpointer(key), key->size); t( ss_iteratorhas(&it) == 0 ); sv *v = ss_iteratorof(&it); t( v == NULL ); sv_indexfree(&i, &st_r.r); }
static void sv_indexiter_lt_eq(void) { svindex i; t( sv_indexinit(&i) == 0 ); int keya = 7; int keyb = 5; int keyc = 2; svv *va = st_svv(&st_r.g, NULL, 0, 0, keya); t( sv_indexset(&i, &st_r.r, va) == 0 ); svv *vb = st_svv(&st_r.g, NULL, 0, 0, keyb); t( sv_indexset(&i, &st_r.r, vb) == 0 ); svv *vc = st_svv(&st_r.g, NULL, 0, 0, keyc); t( sv_indexset(&i, &st_r.r, vc) == 0 ); ssiter it; ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &st_r.r, &i, SS_LT, sv_vpointer(va), va->size); t( ss_iteratorhas(&it) != 0 ); sv *v = ss_iteratorof(&it); t( v->v == vb ); ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &st_r.r, &i, SS_LT, sv_vpointer(vb), vb->size); t( ss_iteratorhas(&it) != 0 ); v = ss_iteratorof(&it); t( v->v == vc ); ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &st_r.r, &i, SS_LT, sv_vpointer(vc), vc->size); t( ss_iteratorhas(&it) == 0 ); v = ss_iteratorof(&it); t( v == NULL ); sv_indexfree(&i, &st_r.r); }
static void sv_index_replace0(void) { svindex i; t( sv_indexinit(&i) == 0 ); uint32_t key = 7; svv *h = st_svv(&st_r.g, NULL, 0, 0, key); svv *n = st_svv(&st_r.g, NULL, 1, 0, key); t( sv_indexset(&i, &st_r.r, h) == 0 ); t( sv_indexset(&i, &st_r.r, n) == 0 ); svv *keyv = st_svv(&st_r.g, &st_r.gc, 0, 0, key); svv *p = getv(&i, &st_r.r, 0, keyv); t( p == h ); p = getv(&i, &st_r.r, 1, keyv); t( p == n ); t( h->next == NULL ); sv_indexfree(&i, &st_r.r); }
static inline int si_split(si *index, sdc *c, ssbuf *result, sinode *parent, ssiter *i, uint64_t size_node, uint32_t size_stream, uint64_t vlsn) { sr *r = index->r; int count = 0; int rc; sdmergeconf mergeconf = { .size_stream = size_stream, .size_node = size_node, .size_page = index->scheme->node_page_size, .checksum = index->scheme->node_page_checksum, .compression = index->scheme->compression, .compression_key = index->scheme->compression_key, .offset = 0, .vlsn = vlsn, .save_delete = 0, .save_update = 0 }; sdmerge merge; sd_mergeinit(&merge, r, i, &c->build, &c->update, &mergeconf); while ((rc = sd_merge(&merge)) > 0) { sinode *n = si_nodenew(r); if (ssunlikely(n == NULL)) goto error; sdid id = { .parent = parent->self.id.id, .flags = 0, .id = sr_seq(index->r->seq, SR_NSNNEXT) }; rc = sd_mergecommit(&merge, &id); if (ssunlikely(rc == -1)) goto error; rc = si_nodecreate(n, r, index->scheme, &id, &merge.index, &c->build); if (ssunlikely(rc == -1)) goto error; rc = ss_bufadd(result, index->r->a, &n, sizeof(sinode*)); if (ssunlikely(rc == -1)) { sr_oom_malfunction(index->r->e); si_nodefree(n, r, 1); goto error; } sd_buildreset(&c->build); count++; } if (ssunlikely(rc == -1)) goto error; return 0; error: si_splitfree(result, r); sd_mergefree(&merge); return -1; } int si_compaction(si *index, sdc *c, uint64_t vlsn, sinode *node, ssiter *stream, uint32_t size_stream) { sr *r = index->r; ssbuf *result = &c->a; ssiter i; /* begin compaction. * * split merge stream into a number * of a new nodes. */ int rc; rc = si_split(index, c, result, node, stream, index->scheme->node_size, size_stream, vlsn); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_0, si_splitfree(result, r); sr_malfunction(r->e, "%s", "error injection"); return -1); /* mask removal of a single node as a * single node update */ int count = ss_bufused(result) / sizeof(sinode*); int count_index; si_lock(index); count_index = index->n; si_unlock(index); sinode *n; if (ssunlikely(count == 0 && count_index == 1)) { n = si_bootstrap(index, node->self.id.id); if (ssunlikely(n == NULL)) return -1; rc = ss_bufadd(result, r->a, &n, sizeof(sinode*)); if (ssunlikely(rc == -1)) { sr_oom_malfunction(r->e); si_nodefree(n, r, 1); return -1; } count++; } /* commit compaction changes */ si_lock(index); svindex *j = si_nodeindex(node); si_plannerremove(&index->p, SI_COMPACT|SI_BRANCH, node); switch (count) { case 0: /* delete */ si_remove(index, node); si_redistribute_index(index, r, c, node); uint32_t used = sv_indexused(j); if (used) { ss_quota(r->quota, SS_QREMOVE, used); } break; case 1: /* self update */ n = *(sinode**)result->s; n->i0 = *j; n->used = sv_indexused(j); si_nodelock(n); si_replace(index, node, n); si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n); break; default: /* split */ rc = si_redistribute(index, r, c, node, result); if (ssunlikely(rc == -1)) { si_unlock(index); si_splitfree(result, r); return -1; } ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); n = ss_iterof(ss_bufiterref, &i); n->used = sv_indexused(&n->i0); si_nodelock(n); si_replace(index, node, n); si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n); for (ss_iternext(ss_bufiterref, &i); ss_iterhas(ss_bufiterref, &i); ss_iternext(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); n->used = sv_indexused(&n->i0); si_nodelock(n); si_insert(index, n); si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n); } break; } sv_indexinit(j); si_unlock(index); /* compaction completion */ /* seal nodes */ ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); if (index->scheme->sync) { rc = si_nodesync(n, r); if (ssunlikely(rc == -1)) return -1; } rc = si_nodeseal(n, r, index->scheme); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_3, si_nodefree(node, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); ss_iternext(ss_bufiterref, &i); } SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_1, si_nodefree(node, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); /* gc old node */ rc = si_nodefree(node, r, 1); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_2, sr_malfunction(r->e, "%s", "error injection"); return -1); /* complete new nodes */ ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); rc = si_nodecomplete(n, r, index->scheme); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_4, sr_malfunction(r->e, "%s", "error injection"); return -1); ss_iternext(ss_bufiterref, &i); } /* unlock */ si_lock(index); ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); si_nodeunlock(n); ss_iternext(ss_bufiterref, &i); } si_unlock(index); return 0; }
static inline int si_split(si *index, sdc *c, ssbuf *result, sinode *parent, ssiter *i, uint64_t size_node, uint64_t size_stream, uint32_t stream, uint64_t vlsn) { sr *r = &index->r; uint32_t timestamp = ss_timestamp(); int rc; sdmergeconf mergeconf = { .stream = stream, .size_stream = size_stream, .size_node = size_node, .size_page = index->scheme.compaction.node_page_size, .checksum = index->scheme.compaction.node_page_checksum, .expire = index->scheme.expire, .timestamp = timestamp, .compression = index->scheme.compression, .compression_if = index->scheme.compression_if, .direct_io = index->scheme.direct_io, .direct_io_page_size = index->scheme.direct_io_page_size, .vlsn = vlsn }; sinode *n = NULL; sdmerge merge; rc = sd_mergeinit(&merge, r, i, &c->build, &c->build_index, &c->upsert, &mergeconf); if (ssunlikely(rc == -1)) return -1; while ((rc = sd_merge(&merge)) > 0) { /* create new node */ uint64_t id = sr_seq(index->r.seq, SR_NSNNEXT); n = si_nodenew(r, id, parent->id); if (ssunlikely(n == NULL)) goto error; rc = si_nodecreate(n, r, &index->scheme); if (ssunlikely(rc == -1)) goto error; /* write pages */ uint64_t offset; offset = sd_iosize(&c->io, &n->file); while ((rc = sd_mergepage(&merge, offset)) == 1) { rc = sd_writepage(r, &n->file, &c->io, merge.build); if (ssunlikely(rc == -1)) goto error; offset = sd_iosize(&c->io, &n->file); } if (ssunlikely(rc == -1)) goto error; offset = sd_iosize(&c->io, &n->file); rc = sd_mergeend(&merge, offset); if (ssunlikely(rc == -1)) goto error; /* write index */ rc = sd_writeindex(r, &n->file, &c->io, &merge.index); if (ssunlikely(rc == -1)) goto error; /* mmap mode */ if (index->scheme.mmap) { rc = si_nodemap(n, r); if (ssunlikely(rc == -1)) goto error; } /* add node to the list */ rc = ss_bufadd(result, index->r.a, &n, sizeof(sinode*)); if (ssunlikely(rc == -1)) { sr_oom_malfunction(index->r.e); goto error; } n->index = merge.index; } if (ssunlikely(rc == -1)) goto error; return 0; error: if (n) si_nodefree(n, r, 0); sd_mergefree(&merge); si_splitfree(result, r); return -1; } static int si_merge(si *index, sdc *c, sinode *node, uint64_t vlsn, ssiter *stream, uint64_t size_stream, uint32_t n_stream) { sr *r = &index->r; ssbuf *result = &c->a; ssiter i; /* begin compaction. * * Split merge stream into a number of * a new nodes. */ int rc; rc = si_split(index, c, result, node, stream, index->scheme.compaction.node_size, size_stream, n_stream, vlsn); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_0, si_splitfree(result, r); sr_malfunction(r->e, "%s", "error injection"); return -1); /* mask removal of a single node as a * single node update */ int count = ss_bufused(result) / sizeof(sinode*); int count_index; si_lock(index); count_index = index->n; si_unlock(index); sinode *n; if (ssunlikely(count == 0 && count_index == 1)) { n = si_bootstrap(index, node->id); if (ssunlikely(n == NULL)) return -1; rc = ss_bufadd(result, r->a, &n, sizeof(sinode*)); if (ssunlikely(rc == -1)) { sr_oom_malfunction(r->e); si_nodefree(n, r, 1); return -1; } count++; } /* commit compaction changes */ si_lock(index); svindex *j = si_nodeindex(node); si_plannerremove(&index->p, node); si_nodesplit(node); switch (count) { case 0: /* delete */ si_remove(index, node); si_redistribute_index(index, r, c, node); break; case 1: /* self update */ n = *(sinode**)result->s; n->i0 = *j; n->used = j->used; si_nodelock(n); si_replace(index, node, n); si_plannerupdate(&index->p, n); break; default: /* split */ rc = si_redistribute(index, r, c, node, result); if (ssunlikely(rc == -1)) { si_unlock(index); si_splitfree(result, r); return -1; } ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); n = ss_iterof(ss_bufiterref, &i); n->used = n->i0.used; si_nodelock(n); si_replace(index, node, n); si_plannerupdate(&index->p, n); for (ss_iternext(ss_bufiterref, &i); ss_iterhas(ss_bufiterref, &i); ss_iternext(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); n->used = n->i0.used; si_nodelock(n); si_insert(index, n); si_plannerupdate(&index->p, n); } break; } sv_indexinit(j); si_unlock(index); /* compaction completion */ /* seal nodes */ ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); if (index->scheme.sync) { rc = ss_filesync(&n->file); if (ssunlikely(rc == -1)) { sr_malfunction(r->e, "db file '%s' sync error: %s", ss_pathof(&n->file.path), strerror(errno)); return -1; } } rc = si_noderename_seal(n, r, &index->scheme); if (ssunlikely(rc == -1)) { si_nodefree(node, r, 0); return -1; } SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_3, si_nodefree(node, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); ss_iternext(ss_bufiterref, &i); } SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_1, si_nodefree(node, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); /* gc node */ uint16_t refs = si_noderefof(node); if (sslikely(refs == 0)) { rc = si_nodefree(node, r, 1); if (ssunlikely(rc == -1)) return -1; } else { /* node concurrently being read, schedule for * delayed removal */ si_nodegc(node, r, &index->scheme); si_lock(index); ss_listappend(&index->gc, &node->gc); index->gc_count++; si_unlock(index); } SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_2, sr_malfunction(r->e, "%s", "error injection"); return -1); /* complete new nodes */ ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); rc = si_noderename_complete(n, r, &index->scheme); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_4, sr_malfunction(r->e, "%s", "error injection"); return -1); ss_iternext(ss_bufiterref, &i); } /* unlock */ si_lock(index); ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); si_nodeunlock(n); ss_iternext(ss_bufiterref, &i); } si_unlock(index); return 0; }
svindexiter_lte_empty(stc *cx ssunused) { ssa a; ss_aopen(&a, &ss_stda); srscheme cmp; sr_schemeinit(&cmp); srkey *part = sr_schemeadd(&cmp, &a); t( sr_keysetname(part, &a, "key") == 0 ); t( sr_keyset(part, &a, "u32") == 0 ); srerror error; sr_errorinit(&error); sr r; sr_init(&r, &error, &a, NULL, SF_KV, SF_SRAW, &cmp, NULL, NULL, NULL); svindex i; t( sv_indexinit(&i) == 0 ); ssiter it; ss_iterinit(sv_indexiter, &it); ss_iteropen(sv_indexiter, &it, &r, &i, SS_LTE, NULL, 0, 0ULL); t( ss_iteratorhas(&it) == 0 ); sv *v = ss_iteratorof(&it); t( v == NULL ); sv_indexfree(&i, &r); ss_aclose(&a); sr_schemefree(&cmp, &a); } static void svindexiter_lte_eq0(stc *cx ssunused) {