static inline int si_noderecover_snapshot(sinode *n, sr *r, sdsnapshotnode *sn) { char *p = (char*)sn + sizeof(sdsnapshotnode); uint32_t i = 0; int first = 1; int rc; while (i < sn->branch_count) { sdindexheader *h = (sdindexheader*)p; sibranch *b; if (first) { b = &n->self; } else { b = si_branchnew(r); if (ssunlikely(b == NULL)) return -1; } sdindex index; sd_indexinit(&index); rc = sd_indexcopy(&index, r, h); if (ssunlikely(rc == -1)) return -1; si_branchset(b, &index); b->next = n->branch; n->branch = b; n->branch_count++; first = 0; p += sd_indexsize_ext(h); i++; } return 0; }
int si_profiler(siprofiler *p) { uint64_t memory_used = 0; ssrbnode *pn; sinode *n; pn = ss_rbmin(&p->i->i); while (pn) { n = sscast(pn, sinode, node); p->total_node_count++; p->count += n->i0.count; p->count += n->i1.count; memory_used += n->i0.used; memory_used += n->i1.used; p->count += n->index.h->keys; p->count_dup += n->index.h->dupkeys; int indexsize = sd_indexsize_ext(n->index.h); p->total_node_size += indexsize + n->index.h->total; p->total_node_origin_size += indexsize + n->index.h->totalorigin; p->total_page_count += n->index.h->count; pn = ss_rbnext(&p->i->i, pn); } p->memory_used = memory_used; p->read_disk = p->i->read_disk; p->read_cache = p->i->read_cache; return 0; }
int si_profiler(siprofiler *p) { uint32_t temperature_total = 0; uint64_t memory_used = 0; ssrbnode *pn; sinode *n; pn = ss_rbmin(&p->i->i); while (pn) { n = sscast(pn, sinode, node); if (p->temperature_max < n->temperature) p->temperature_max = n->temperature; if (p->temperature_min > n->temperature) p->temperature_min = n->temperature; temperature_total += n->temperature; p->total_node_count++; p->count += n->i0.count; p->count += n->i1.count; p->total_branch_count += n->branch_count; if (p->total_branch_max < n->branch_count) p->total_branch_max = n->branch_count; if (n->branch_count < 20) p->histogram_branch[n->branch_count]++; else p->histogram_branch_20plus++; memory_used += n->i0.used; memory_used += n->i1.used; sibranch *b = n->branch; while (b) { p->count += b->index.h->keys; p->count_dup += b->index.h->dupkeys; int indexsize = sd_indexsize_ext(b->index.h); p->total_snapshot_size += indexsize; p->total_node_size += indexsize + b->index.h->total; p->total_node_origin_size += indexsize + b->index.h->totalorigin; p->total_page_count += b->index.h->count; if (b->index.h->extensions & SD_INDEXEXT_AMQF) { p->total_amqf_size += sizeof(sdindexamqf) + sd_indexamqf(&b->index)->size; } b = b->next; } pn = ss_rbnext(&p->i->i, pn); } if (p->total_node_count > 0) { p->total_branch_avg = p->total_branch_count / p->total_node_count; p->temperature_avg = temperature_total / p->total_node_count; } p->memory_used = memory_used; p->read_disk = p->i->read_disk; p->read_cache = p->i->read_cache; si_profiler_histogram_branch(p); si_profiler_histogram_temperature(p); return 0; }
int sd_indexcopy(sdindex *i, sr *r, sdindexheader *h) { int size = sd_indexsize_ext(h); int rc = ss_bufensure(&i->i, r->a, size); if (ssunlikely(rc == -1)) return sr_oom(r->e); memcpy(i->i.s, (char*)h, size); ss_bufadvance(&i->i, size); i->h = sd_indexheader(i); return 0; }
int sd_indexcopy(sdindex *i, sr *r, sdindexheader *h) { int size = sd_indexsize_ext(h); int rc = ss_bufensure(&i->i, r->a, size); if (ssunlikely(rc == -1)) return sr_oom(r->e); char *start = (char*)h - (h->align + h->size + h->extension); memcpy(i->i.s, start, size); ss_bufadvance(&i->i, size); i->h = sd_indexheader(i); return 0; }
static inline int si_branchcreate(si *index, sdc *c, sinode *parent, svindex *vindex, uint64_t vlsn, sibranch **result) { sr *r = &index->r; sibranch *branch = NULL; /* in-memory mode blob */ int rc; ssblob copy, *blob = NULL; if (parent->in_memory) { ss_blobinit(©, r->vfs); rc = ss_blobensure(©, 10ULL * 1024 * 1024); if (ssunlikely(rc == -1)) return sr_oom_malfunction(r->e); blob = © } svmerge vmerge; sv_mergeinit(&vmerge); rc = sv_mergeprepare(&vmerge, r, 1); if (ssunlikely(rc == -1)) return -1; svmergesrc *s = sv_mergeadd(&vmerge, NULL); ss_iterinit(sv_indexiter, &s->src); ss_iteropen(sv_indexiter, &s->src, r, vindex, SS_GTE, NULL, 0); ssiter i; ss_iterinit(sv_mergeiter, &i); ss_iteropen(sv_mergeiter, &i, r, &vmerge, SS_GTE); /* merge iter is not used */ uint32_t timestamp = ss_timestamp(); sdmergeconf mergeconf = { .stream = vindex->count, .size_stream = UINT32_MAX, .size_node = UINT64_MAX, .size_page = index->scheme.node_page_size, .checksum = index->scheme.node_page_checksum, .expire = index->scheme.expire, .timestamp = timestamp, .compression_key = index->scheme.compression_key, .compression = index->scheme.compression_branch, .compression_if = index->scheme.compression_branch_if, .amqf = index->scheme.amqf, .vlsn = vlsn, .vlsn_lru = 0, .save_delete = 1, .save_upsert = 1, .save_set = 1 }; sdmerge merge; rc = sd_mergeinit(&merge, r, &i, &c->build, &c->qf, &c->upsert, &mergeconf); if (ssunlikely(rc == -1)) return -1; while ((rc = sd_merge(&merge)) > 0) { assert(branch == NULL); /* write open seal */ uint64_t seal = parent->file.size; rc = sd_writeseal(r, &parent->file, blob); if (ssunlikely(rc == -1)) goto e0; /* write pages */ uint64_t offset = parent->file.size; while ((rc = sd_mergepage(&merge, offset)) == 1) { rc = sd_writepage(r, &parent->file, blob, merge.build); if (ssunlikely(rc == -1)) goto e0; offset = parent->file.size; } if (ssunlikely(rc == -1)) goto e0; sdid id = { .parent = parent->self.id.id, .flags = SD_IDBRANCH, .id = sr_seq(r->seq, SR_NSNNEXT) }; rc = sd_mergecommit(&merge, &id, parent->file.size); if (ssunlikely(rc == -1)) goto e0; /* write index */ rc = sd_writeindex(r, &parent->file, blob, &merge.index); if (ssunlikely(rc == -1)) goto e0; if (index->scheme.sync) { rc = ss_filesync(&parent->file); if (ssunlikely(rc == -1)) { sr_malfunction(r->e, "file '%s' sync error: %s", ss_pathof(&parent->file.path), strerror(errno)); goto e0; } } SS_INJECTION(r->i, SS_INJECTION_SI_BRANCH_0, sd_mergefree(&merge); sr_malfunction(r->e, "%s", "error injection"); return -1); /* seal the branch */ rc = sd_seal(r, &parent->file, blob, &merge.index, seal); if (ssunlikely(rc == -1)) goto e0; if (index->scheme.sync == 2) { rc = ss_filesync(&parent->file); if (ssunlikely(rc == -1)) { sr_malfunction(r->e, "file '%s' sync error: %s", ss_pathof(&parent->file.path), strerror(errno)); goto e0; } } /* create new branch object */ branch = si_branchnew(r); if (ssunlikely(branch == NULL)) goto e0; si_branchset(branch, &merge.index); } sv_mergefree(&vmerge, r->a); if (ssunlikely(rc == -1)) { sr_oom_malfunction(r->e); goto e0; } /* in case of expire, branch may not be created if there * are no keys left */ if (ssunlikely(branch == NULL)) return 0; /* in-memory mode support */ if (blob) { rc = ss_blobfit(blob); if (ssunlikely(rc == -1)) { ss_blobfree(blob); goto e1; } branch->copy = copy; } /* mmap support */ if (index->scheme.mmap) { ss_mmapinit(&parent->map_swap); rc = ss_vfsmmap(r->vfs, &parent->map_swap, parent->file.fd, parent->file.size, 1); if (ssunlikely(rc == -1)) { sr_malfunction(r->e, "db file '%s' mmap error: %s", ss_pathof(&parent->file.path), strerror(errno)); goto e1; } } *result = branch; return 0; e0: sd_mergefree(&merge); if (blob) ss_blobfree(blob); sv_mergefree(&vmerge, r->a); return -1; e1: si_branchfree(branch, r); return -1; } int si_branch(si *index, sdc *c, siplan *plan, uint64_t vlsn) { sr *r = &index->r; sinode *n = plan->node; assert(n->flags & SI_LOCK); si_lock(index); if (ssunlikely(n->used == 0)) { si_nodeunlock(n); si_unlock(index); return 0; } svindex *i; i = si_noderotate(n); si_unlock(index); sibranch *branch = NULL; int rc = si_branchcreate(index, c, n, i, vlsn, &branch); if (ssunlikely(rc == -1)) return -1; if (ssunlikely(branch == NULL)) { si_lock(index); uint32_t used = sv_indexused(i); n->used -= used; ss_quota(r->quota, SS_QREMOVE, used); svindex swap = *i; si_nodeunrotate(n); si_nodeunlock(n); si_plannerupdate(&index->p, SI_BRANCH|SI_COMPACT, n); si_unlock(index); si_nodegc_index(r, &swap); return 0; } /* commit */ si_lock(index); branch->next = n->branch; n->branch->link = branch; n->branch = branch; n->branch_count++; uint32_t used = sv_indexused(i); n->used -= used; ss_quota(r->quota, SS_QREMOVE, used); index->size += sd_indexsize_ext(branch->index.h) + sd_indextotal(&branch->index); svindex swap = *i; si_nodeunrotate(n); si_nodeunlock(n); si_plannerupdate(&index->p, SI_BRANCH|SI_COMPACT, n); ssmmap swap_map = n->map; n->map = n->map_swap; memset(&n->map_swap, 0, sizeof(n->map_swap)); si_unlock(index); /* gc */ if (index->scheme.mmap) { int rc = ss_vfsmunmap(r->vfs, &swap_map); if (ssunlikely(rc == -1)) { sr_malfunction(r->e, "db file '%s' munmap error: %s", ss_pathof(&n->file.path), strerror(errno)); return -1; } } si_nodegc_index(r, &swap); return 1; } int si_compact(si *index, sdc *c, siplan *plan, uint64_t vlsn, uint64_t vlsn_lru, ssiter *vindex, uint64_t vindex_used) { sr *r = &index->r; sinode *node = plan->node; assert(node->flags & SI_LOCK); /* prepare for compaction */ int rc; rc = sd_censure(c, r, node->branch_count); if (ssunlikely(rc == -1)) return sr_oom_malfunction(r->e); svmerge merge; sv_mergeinit(&merge); rc = sv_mergeprepare(&merge, r, node->branch_count + 1); if (ssunlikely(rc == -1)) return -1; /* read node file into memory */ int use_mmap = index->scheme.mmap; ssmmap *map = &node->map; ssmmap preload; if (index->scheme.node_compact_load) { rc = si_noderead(node, r, &c->c); if (ssunlikely(rc == -1)) return -1; preload.p = c->c.s; preload.size = ss_bufused(&c->c); map = &preload; use_mmap = 1; } /* include vindex into merge process */ svmergesrc *s; uint32_t count = 0; uint64_t size_stream = 0; if (vindex) { s = sv_mergeadd(&merge, vindex); size_stream = vindex_used; } sdcbuf *cbuf = c->head; sibranch *b = node->branch; while (b) { s = sv_mergeadd(&merge, NULL); /* choose compression type */ int compression; ssfilterif *compression_if; if (! si_branchis_root(b)) { compression = index->scheme.compression_branch; compression_if = index->scheme.compression_branch_if; } else { compression = index->scheme.compression; compression_if = index->scheme.compression_if; } sdreadarg arg = { .index = &b->index, .buf = &cbuf->a, .buf_xf = &cbuf->b, .buf_read = &c->d, .index_iter = &cbuf->index_iter, .page_iter = &cbuf->page_iter, .use_memory = node->in_memory, .use_mmap = use_mmap, .use_mmap_copy = 0, .use_compression = compression, .compression_if = compression_if, .has = 0, .has_vlsn = 0, .o = SS_GTE, .memory = &b->copy, .mmap = map, .file = &node->file, .r = r }; ss_iterinit(sd_read, &s->src); int rc = ss_iteropen(sd_read, &s->src, &arg, NULL, 0); if (ssunlikely(rc == -1)) return sr_oom_malfunction(r->e); size_stream += sd_indextotal(&b->index); count += sd_indexkeys(&b->index); cbuf = cbuf->next; b = b->next; } ssiter i; ss_iterinit(sv_mergeiter, &i); ss_iteropen(sv_mergeiter, &i, r, &merge, SS_GTE); rc = si_merge(index, c, node, vlsn, vlsn_lru, &i, size_stream, count); sv_mergefree(&merge, r->a); return rc; } int si_compact_index(si *index, sdc *c, siplan *plan, uint64_t vlsn, uint64_t vlsn_lru) { sinode *node = plan->node; si_lock(index); if (ssunlikely(node->used == 0)) { si_nodeunlock(node); si_unlock(index); return 0; } svindex *vindex; vindex = si_noderotate(node); si_unlock(index); uint64_t size_stream = sv_indexused(vindex); ssiter i; ss_iterinit(sv_indexiter, &i); ss_iteropen(sv_indexiter, &i, &index->r, vindex, SS_GTE, NULL, 0); return si_compact(index, c, plan, vlsn, vlsn_lru, &i, size_stream); }