static inline int si_trackvalidate(sitrack *track, ssbuf *buf, sr *r, si *i) { ss_bufreset(buf); ssrbnode *p = ss_rbmax(&track->i); while (p) { sinode *n = sscast(p, sinode, node); switch (n->recover) { case SI_RDB|SI_RDB_DBI|SI_RDB_DBSEAL|SI_RDB_REMOVE: case SI_RDB|SI_RDB_DBSEAL|SI_RDB_REMOVE: case SI_RDB|SI_RDB_REMOVE: case SI_RDB_UNDEF|SI_RDB_DBSEAL|SI_RDB_REMOVE: case SI_RDB|SI_RDB_DBI|SI_RDB_DBSEAL: case SI_RDB|SI_RDB_DBI: case SI_RDB: case SI_RDB|SI_RDB_DBSEAL: case SI_RDB_UNDEF|SI_RDB_DBSEAL: { /* match and remove any leftover ancestor */ sinode *ancestor = si_trackget(track, n->self.id.parent); if (ancestor && (ancestor != n)) ancestor->recover |= SI_RDB_REMOVE; break; } case SI_RDB_DBSEAL: { /* find parent */ sinode *parent = si_trackget(track, n->self.id.parent); if (parent) { /* schedule node for removal, if has incomplete merges */ if (parent->recover & SI_RDB_DBI) n->recover |= SI_RDB_REMOVE; else parent->recover |= SI_RDB_REMOVE; } if (! (n->recover & SI_RDB_REMOVE)) { /* complete node */ int rc = si_nodecomplete(n, r, i->scheme); if (ssunlikely(rc == -1)) return -1; n->recover = SI_RDB; } break; } default: /* corrupted states */ return sr_malfunction(r->e, "corrupted database repository: %s", i->scheme->path); } p = ss_rbprev(&track->i, p); } return 0; }
sinode *si_bootstrap(si *i, uint64_t parent) { sr *r = i->r; /* create node */ sinode *n = si_nodenew(r); if (ssunlikely(n == NULL)) return NULL; sdid id = { .parent = parent, .flags = 0, .id = sr_seq(r->seq, SR_NSNNEXT) }; int rc; rc = si_nodecreate(n, r, i->scheme, &id); if (ssunlikely(rc == -1)) goto e0; n->branch = &n->self; n->branch_count++; /* in-memory mode support */ ssblob *blob = NULL; if (i->scheme->storage == SI_SIN_MEMORY) { blob = &n->self.copy; rc = ss_blobensure(blob, 4096); if (ssunlikely(rc == -1)) goto e0; n->in_memory = 1; } /* create index with one empty page */ sdindex index; sd_indexinit(&index); rc = sd_indexbegin(&index, r); if (ssunlikely(rc == -1)) goto e0; ssqf f, *qf = NULL; ss_qfinit(&f); sdbuild build; sd_buildinit(&build); rc = sd_buildbegin(&build, r, i->scheme->node_page_checksum, i->scheme->compression_key, i->scheme->compression, i->scheme->compression_if); if (ssunlikely(rc == -1)) goto e1; sd_buildend(&build, r); rc = sd_indexadd(&index, r, &build, sizeof(sdseal)); if (ssunlikely(rc == -1)) goto e1; /* write seal */ uint64_t seal = n->file.size; rc = sd_writeseal(r, &n->file, blob); if (ssunlikely(rc == -1)) goto e1; /* write page */ rc = sd_writepage(r, &n->file, blob, &build); if (ssunlikely(rc == -1)) goto e1; /* amqf */ if (i->scheme->amqf) { rc = ss_qfensure(&f, r->a, 0); if (ssunlikely(rc == -1)) goto e1; qf = &f; } rc = sd_indexcommit(&index, r, &id, qf, n->file.size); if (ssunlikely(rc == -1)) goto e1; ss_qffree(&f, r->a); /* write index */ rc = sd_writeindex(r, &n->file, blob, &index); if (ssunlikely(rc == -1)) goto e1; /* close seal */ rc = sd_seal(r, &n->file, blob, &index, seal); if (ssunlikely(rc == -1)) goto e1; if (blob) { rc = ss_blobfit(blob); if (ssunlikely(rc == -1)) goto e1; } if (i->scheme->mmap) { rc = si_nodemap(n, r); if (ssunlikely(rc == -1)) goto e1; } si_branchset(&n->self, &index); sd_buildcommit(&build, r); sd_buildfree(&build, r); return n; e1: ss_qffree(&f, r->a); sd_indexfree(&index, r); sd_buildfree(&build, r); e0: si_nodefree(n, r, 0); return NULL; } static inline int si_deploy(si *i, sr *r, int create_directory) { /* create directory */ int rc; if (sslikely(create_directory)) { rc = ss_vfsmkdir(r->vfs, i->scheme->path, 0755); if (ssunlikely(rc == -1)) { sr_malfunction(r->e, "directory '%s' create error: %s", i->scheme->path, strerror(errno)); return -1; } } /* create scheme file */ rc = si_schemedeploy(i->scheme, r); if (ssunlikely(rc == -1)) { sr_malfunction_set(r->e); return -1; } /* create initial node */ sinode *n = si_bootstrap(i, 0); if (ssunlikely(n == NULL)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_RECOVER_0, si_nodefree(n, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); rc = si_nodecomplete(n, r, i->scheme); if (ssunlikely(rc == -1)) { si_nodefree(n, r, 1); return -1; } si_insert(i, n); si_plannerupdate(&i->p, SI_COMPACT|SI_BRANCH|SI_TEMP, n); i->size = si_nodesize(n); return 1; }
static inline int si_split(si *index, sdc *c, ssbuf *result, sinode *parent, ssiter *i, uint64_t size_node, uint32_t size_stream, uint64_t vlsn) { sr *r = index->r; int count = 0; int rc; sdmergeconf mergeconf = { .size_stream = size_stream, .size_node = size_node, .size_page = index->scheme->node_page_size, .checksum = index->scheme->node_page_checksum, .compression = index->scheme->compression, .compression_key = index->scheme->compression_key, .offset = 0, .vlsn = vlsn, .save_delete = 0, .save_update = 0 }; sdmerge merge; sd_mergeinit(&merge, r, i, &c->build, &c->update, &mergeconf); while ((rc = sd_merge(&merge)) > 0) { sinode *n = si_nodenew(r); if (ssunlikely(n == NULL)) goto error; sdid id = { .parent = parent->self.id.id, .flags = 0, .id = sr_seq(index->r->seq, SR_NSNNEXT) }; rc = sd_mergecommit(&merge, &id); if (ssunlikely(rc == -1)) goto error; rc = si_nodecreate(n, r, index->scheme, &id, &merge.index, &c->build); if (ssunlikely(rc == -1)) goto error; rc = ss_bufadd(result, index->r->a, &n, sizeof(sinode*)); if (ssunlikely(rc == -1)) { sr_oom_malfunction(index->r->e); si_nodefree(n, r, 1); goto error; } sd_buildreset(&c->build); count++; } if (ssunlikely(rc == -1)) goto error; return 0; error: si_splitfree(result, r); sd_mergefree(&merge); return -1; } int si_compaction(si *index, sdc *c, uint64_t vlsn, sinode *node, ssiter *stream, uint32_t size_stream) { sr *r = index->r; ssbuf *result = &c->a; ssiter i; /* begin compaction. * * split merge stream into a number * of a new nodes. */ int rc; rc = si_split(index, c, result, node, stream, index->scheme->node_size, size_stream, vlsn); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_0, si_splitfree(result, r); sr_malfunction(r->e, "%s", "error injection"); return -1); /* mask removal of a single node as a * single node update */ int count = ss_bufused(result) / sizeof(sinode*); int count_index; si_lock(index); count_index = index->n; si_unlock(index); sinode *n; if (ssunlikely(count == 0 && count_index == 1)) { n = si_bootstrap(index, node->self.id.id); if (ssunlikely(n == NULL)) return -1; rc = ss_bufadd(result, r->a, &n, sizeof(sinode*)); if (ssunlikely(rc == -1)) { sr_oom_malfunction(r->e); si_nodefree(n, r, 1); return -1; } count++; } /* commit compaction changes */ si_lock(index); svindex *j = si_nodeindex(node); si_plannerremove(&index->p, SI_COMPACT|SI_BRANCH, node); switch (count) { case 0: /* delete */ si_remove(index, node); si_redistribute_index(index, r, c, node); uint32_t used = sv_indexused(j); if (used) { ss_quota(r->quota, SS_QREMOVE, used); } break; case 1: /* self update */ n = *(sinode**)result->s; n->i0 = *j; n->used = sv_indexused(j); si_nodelock(n); si_replace(index, node, n); si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n); break; default: /* split */ rc = si_redistribute(index, r, c, node, result); if (ssunlikely(rc == -1)) { si_unlock(index); si_splitfree(result, r); return -1; } ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); n = ss_iterof(ss_bufiterref, &i); n->used = sv_indexused(&n->i0); si_nodelock(n); si_replace(index, node, n); si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n); for (ss_iternext(ss_bufiterref, &i); ss_iterhas(ss_bufiterref, &i); ss_iternext(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); n->used = sv_indexused(&n->i0); si_nodelock(n); si_insert(index, n); si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n); } break; } sv_indexinit(j); si_unlock(index); /* compaction completion */ /* seal nodes */ ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); if (index->scheme->sync) { rc = si_nodesync(n, r); if (ssunlikely(rc == -1)) return -1; } rc = si_nodeseal(n, r, index->scheme); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_3, si_nodefree(node, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); ss_iternext(ss_bufiterref, &i); } SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_1, si_nodefree(node, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); /* gc old node */ rc = si_nodefree(node, r, 1); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_2, sr_malfunction(r->e, "%s", "error injection"); return -1); /* complete new nodes */ ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); rc = si_nodecomplete(n, r, index->scheme); if (ssunlikely(rc == -1)) return -1; SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_4, sr_malfunction(r->e, "%s", "error injection"); return -1); ss_iternext(ss_bufiterref, &i); } /* unlock */ si_lock(index); ss_iterinit(ss_bufiterref, &i); ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*)); while (ss_iterhas(ss_bufiterref, &i)) { n = ss_iterof(ss_bufiterref, &i); si_nodeunlock(n); ss_iternext(ss_bufiterref, &i); } si_unlock(index); return 0; }
sinode *si_bootstrap(si *i, sr *r, uint32_t parent) { sinode *n = si_nodenew(r); if (srunlikely(n == NULL)) return NULL; sdid id = { .parent = parent, .flags = 0, .id = sr_seq(r->seq, SR_NSNNEXT) }; sdindex index; sd_indexinit(&index); int rc = sd_indexbegin(&index, r, 0); if (srunlikely(rc == -1)) { si_nodefree(n, r, 0); return NULL; } sdbuild build; sd_buildinit(&build); rc = sd_buildbegin(&build, r, i->conf->node_page_checksum, i->conf->compression); if (srunlikely(rc == -1)) { sd_indexfree(&index, r); sd_buildfree(&build, r); si_nodefree(n, r, 0); return NULL; } sd_buildend(&build, r); rc = sd_indexadd(&index, r, &build); if (srunlikely(rc == -1)) { sd_indexfree(&index, r); si_nodefree(n, r, 0); return NULL; } sd_buildcommit(&build); sd_indexcommit(&index, r, &id); rc = si_nodecreate(n, r, i->conf, &id, &index, &build); sd_buildfree(&build, r); if (srunlikely(rc == -1)) { si_nodefree(n, r, 1); return NULL; } return n; } static inline int si_deploy(si *i, sr *r) { int rc = sr_filemkdir(i->conf->path); if (srunlikely(rc == -1)) { sr_malfunction(r->e, "directory '%s' create error: %s", i->conf->path, strerror(errno)); return -1; } sinode *n = si_bootstrap(i, r, 0); if (srunlikely(n == NULL)) return -1; SR_INJECTION(r->i, SR_INJECTION_SI_RECOVER_0, si_nodefree(n, r, 0); sr_malfunction(r->e, "%s", "error injection"); return -1); rc = si_nodecomplete(n, r, i->conf); if (srunlikely(rc == -1)) { si_nodefree(n, r, 1); return -1; } si_insert(i, r, n); si_plannerupdate(&i->p, SI_COMPACT|SI_BRANCH, n); return 1; }