예제 #1
0
파일: si_node.c 프로젝트: ifzz/sophia
static inline int
si_nodeclose(sinode *n, sr *r, int gc)
{
	int rcret = 0;

	int rc = ss_vfsmunmap(r->vfs, &n->map);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "db file '%s' munmap error: %s",
		               ss_pathof(&n->file.path),
		               strerror(errno));
		rcret = -1;
	}
	rc = ss_fileclose(&n->file);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "db file '%s' close error: %s",
		               ss_pathof(&n->file.path),
		               strerror(errno));
		rcret = -1;
	}
	if (gc) {
		si_nodegc_index(r, &n->i0);
		si_nodegc_index(r, &n->i1);
	} else {
		sv_indexfree(&n->i0, r);
		sv_indexfree(&n->i1, r);
	}
	return rcret;
}
예제 #2
0
static inline int
ss_slabaclose(ssa *a)
{
	ssslaba *s = (ssslaba*)a->priv;
	ss_vfsmunmap(s->vfs, &s->pool);
	ss_spinlockfree(&s->lock);
	return 0;
}
예제 #3
0
static inline sibranch*
si_branchcreate(si *index, sdc *c, sinode *parent, svindex *vindex, uint64_t vlsn)
{
	sr *r = index->r;
	sibranch *branch = NULL;

	/* in-memory mode blob */
	int rc;
	ssblob copy, *blob = NULL;
	if (parent->in_memory) {
		ss_blobinit(&copy, r->vfs);
		rc = ss_blobensure(&copy, 10ULL * 1024 * 1024);
		if (ssunlikely(rc == -1)) {
			sr_oom_malfunction(r->e);
			return NULL;
		}
		blob = ©
	}

	svmerge vmerge;
	sv_mergeinit(&vmerge);
	rc = sv_mergeprepare(&vmerge, r, 1);
	if (ssunlikely(rc == -1))
		return NULL;
	svmergesrc *s = sv_mergeadd(&vmerge, NULL);
	ss_iterinit(sv_indexiter, &s->src);
	ss_iteropen(sv_indexiter, &s->src, r, vindex, SS_GTE, NULL, 0);
	ssiter i;
	ss_iterinit(sv_mergeiter, &i);
	ss_iteropen(sv_mergeiter, &i, r, &vmerge, SS_GTE);

	/* merge iter is not used */
	sdmergeconf mergeconf = {
		.size_stream     = UINT32_MAX,
		.size_node       = UINT64_MAX,
		.size_page       = index->scheme->node_page_size,
		.checksum        = index->scheme->node_page_checksum,
		.compression_key = index->scheme->compression_key,
		.compression     = index->scheme->compression_branch,
		.compression_if  = index->scheme->compression_branch_if,
		.vlsn            = vlsn,
		.vlsn_lru        = 0,
		.save_delete     = 1,
		.save_upsert     = 1
	};
	sdmerge merge;
	sd_mergeinit(&merge, r, &i, &c->build, &c->upsert, &mergeconf);

	while ((rc = sd_merge(&merge)) > 0)
	{
		assert(branch == NULL);

		/* write open seal */
		uint64_t seal = parent->file.size;
		rc = sd_writeseal(r, &parent->file, blob);
		if (ssunlikely(rc == -1))
			goto e0;

		/* write pages */
		uint64_t offset = parent->file.size;
		while ((rc = sd_mergepage(&merge, offset)) == 1)
		{
			rc = sd_writepage(r, &parent->file, blob, merge.build);
			if (ssunlikely(rc == -1))
				goto e0;
			offset = parent->file.size;
		}
		if (ssunlikely(rc == -1))
			goto e0;
		sdid id = {
			.parent = parent->self.id.id,
			.flags  = SD_IDBRANCH,
			.id     = sr_seq(r->seq, SR_NSNNEXT)
		};
		rc = sd_mergecommit(&merge, &id, parent->file.size);
		if (ssunlikely(rc == -1))
			goto e0;

		/* write index */
		rc = sd_writeindex(r, &parent->file, blob, &merge.index);
		if (ssunlikely(rc == -1))
			goto e0;
		if (index->scheme->sync) {
			rc = ss_filesync(&parent->file);
			if (ssunlikely(rc == -1)) {
				sr_malfunction(r->e, "file '%s' sync error: %s",
				               ss_pathof(&parent->file.path),
				               strerror(errno));
				goto e0;
			}
		}

		SS_INJECTION(r->i, SS_INJECTION_SI_BRANCH_0,
		             sd_mergefree(&merge);
		             sr_malfunction(r->e, "%s", "error injection");
		             return NULL);

		/* seal the branch */
		rc = sd_seal(r, &parent->file, blob, &merge.index, seal);
		if (ssunlikely(rc == -1))
			goto e0;
		if (index->scheme->sync == 2) {
			rc = ss_filesync(&parent->file);
			if (ssunlikely(rc == -1)) {
				sr_malfunction(r->e, "file '%s' sync error: %s",
				               ss_pathof(&parent->file.path),
				               strerror(errno));
				goto e0;
			}
		}

		/* create new branch object */
		branch = si_branchnew(r);
		if (ssunlikely(branch == NULL))
			goto e0;
		si_branchset(branch, &merge.index);
	}
	sv_mergefree(&vmerge, r->a);

	if (ssunlikely(rc == -1)) {
		sr_oom_malfunction(r->e);
		goto e0;
	}
	assert(branch != NULL);

	/* in-memory mode support */
	if (blob) {
		rc = ss_blobfit(blob);
		if (ssunlikely(rc == -1)) {
			ss_blobfree(blob);
			goto e1;
		}
		branch->copy = copy;
	}
	/* mmap support */
	if (index->scheme->mmap) {
		ss_mmapinit(&parent->map_swap);
		rc = ss_vfsmmap(r->vfs, &parent->map_swap, parent->file.fd,
		              parent->file.size, 1);
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "db file '%s' mmap error: %s",
			               ss_pathof(&parent->file.path),
			               strerror(errno));
			goto e1;
		}
	}
	return branch;
e0:
	sd_mergefree(&merge);
	if (blob)
		ss_blobfree(blob);
	return NULL;
e1:
	si_branchfree(branch, r);
	return NULL;
}

int si_branch(si *index, sdc *c, siplan *plan, uint64_t vlsn)
{
	sr *r = index->r;
	sinode *n = plan->node;
	assert(n->flags & SI_LOCK);

	si_lock(index);
	if (ssunlikely(n->used == 0)) {
		si_nodeunlock(n);
		si_unlock(index);
		return 0;
	}
	svindex *i;
	i = si_noderotate(n);
	si_unlock(index);

	sibranch *branch = si_branchcreate(index, c, n, i, vlsn);
	if (ssunlikely(branch == NULL))
		return -1;

	/* commit */
	si_lock(index);
	branch->next = n->branch;
	n->branch->link = branch;
	n->branch = branch;
	n->branch_count++;
	uint32_t used = sv_indexused(i);
	n->used -= used;
	ss_quota(r->quota, SS_QREMOVE, used);
	index->size +=
		sd_indexsize(branch->index.h) +
		sd_indextotal(&branch->index);
	svindex swap = *i;
	si_nodeunrotate(n);
	si_nodeunlock(n);
	si_plannerupdate(&index->p, SI_BRANCH|SI_COMPACT, n);
	ssmmap swap_map = n->map;
	n->map = n->map_swap;
	memset(&n->map_swap, 0, sizeof(n->map_swap));
	si_unlock(index);

	/* gc */
	if (index->scheme->mmap) {
		int rc = ss_vfsmunmap(r->vfs, &swap_map);
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "db file '%s' munmap error: %s",
			               ss_pathof(&n->file.path),
			               strerror(errno));
			return -1;
		}
	}
	si_nodegc_index(r, &swap);
	return 1;
}

int si_compact(si *index, sdc *c, siplan *plan,
               uint64_t vlsn,
               uint64_t vlsn_lru,
               ssiter *vindex,
               uint64_t vindex_used)
{
	sr *r = index->r;
	sinode *node = plan->node;
	assert(node->flags & SI_LOCK);

	/* prepare for compaction */
	int rc;
	rc = sd_censure(c, r, node->branch_count);
	if (ssunlikely(rc == -1))
		return sr_oom_malfunction(r->e);
	svmerge merge;
	sv_mergeinit(&merge);
	rc = sv_mergeprepare(&merge, r, node->branch_count + 1);
	if (ssunlikely(rc == -1))
		return -1;

	/* read node file into memory */
	int use_mmap = index->scheme->mmap;
	ssmmap *map = &node->map;
	ssmmap  preload;
	if (index->scheme->node_compact_load) {
		rc = si_noderead(node, r, &c->c);
		if (ssunlikely(rc == -1))
			return -1;
		preload.p = c->c.s;
		preload.size = ss_bufused(&c->c);
		map = &preload;
		use_mmap = 1;
	}

	/* include vindex into merge process */
	svmergesrc *s;
	uint64_t size_stream = 0;
	if (vindex) {
		s = sv_mergeadd(&merge, vindex);
		size_stream = vindex_used;
	}

	sdcbuf *cbuf = c->head;
	sibranch *b = node->branch;
	while (b) {
		s = sv_mergeadd(&merge, NULL);
		/* choose compression type */
		int compression;
		ssfilterif *compression_if;
		if (! si_branchis_root(b)) {
			compression    = index->scheme->compression_branch;
			compression_if = index->scheme->compression_branch_if;
		} else {
			compression    = index->scheme->compression;
			compression_if = index->scheme->compression_if;
		}
		sdreadarg arg = {
			.index           = &b->index,
			.buf             = &cbuf->a,
			.buf_xf          = &cbuf->b,
			.buf_read        = &c->d,
			.index_iter      = &cbuf->index_iter,
			.page_iter       = &cbuf->page_iter,
			.use_memory      = node->in_memory,
			.use_mmap        = use_mmap,
			.use_mmap_copy   = 0,
			.use_compression = compression,
			.compression_if  = compression_if,
			.has             = 0,
			.has_vlsn        = 0,
			.o               = SS_GTE,
			.memory          = &b->copy,
			.mmap            = map,
			.file            = &node->file,
			.r               = r
		};
		ss_iterinit(sd_read, &s->src);
		int rc = ss_iteropen(sd_read, &s->src, &arg, NULL, 0);
		if (ssunlikely(rc == -1))
			return sr_oom_malfunction(r->e);
		size_stream += sd_indextotal(&b->index);
		cbuf = cbuf->next;
		b = b->next;
	}
	ssiter i;
	ss_iterinit(sv_mergeiter, &i);
	ss_iteropen(sv_mergeiter, &i, r, &merge, SS_GTE);
	rc = si_merge(index, c, node, vlsn, vlsn_lru, &i, size_stream);
	sv_mergefree(&merge, r->a);
	return rc;
}

int si_compact_index(si *index, sdc *c, siplan *plan,
                     uint64_t vlsn,
                     uint64_t vlsn_lru)
{
	sinode *node = plan->node;

	si_lock(index);
	if (ssunlikely(node->used == 0)) {
		si_nodeunlock(node);
		si_unlock(index);
		return 0;
	}
	svindex *vindex;
	vindex = si_noderotate(node);
	si_unlock(index);

	uint64_t size_stream = sv_indexused(vindex);
	ssiter i;
	ss_iterinit(sv_indexiter, &i);
	ss_iteropen(sv_indexiter, &i, index->r, vindex, SS_GTE, NULL, 0);
	return si_compact(index, c, plan, vlsn, vlsn_lru, &i, size_stream);
}
예제 #4
0
static void
sd_read_gt0(void)
{
	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);

	int key = 7;
	addv(&b, &st_r.r, 3, 0, &key);
	key = 8;
	addv(&b, &st_r.r, 4, 0, &key);
	key = 9;
	addv(&b, &st_r.r, 5, 0, &key);
	sd_buildend(&b, &st_r.r);

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &st_r.r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &st_r.r, &b, sizeof(sdseal));
	t( rc == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	ssfile f;
	ss_fileinit(&f, &st_r.vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&st_r.r, &f, NULL) == 0 );
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );
	t( sd_indexcommit(&index, &st_r.r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&st_r.r, &f, NULL, &index) == 0 );
	t( sd_seal(&st_r.r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &st_r.a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 0,
		.compression_if  = NULL,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &st_r.r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 9);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&st_r.vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &st_r.r);
	sd_buildfree(&b, &st_r.r);
	ss_buffree(&xfbuf, &st_r.a);
	ss_buffree(&buf, &st_r.a);
}

static void
sd_read_gt1(void)
{
	ssfile f;
	ss_fileinit(&f, &st_r.vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&st_r.r, &f, NULL) == 0 );

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);

	int key = 7;
	addv(&b, &st_r.r, 3, 0, &key);
	key = 8;
	addv(&b, &st_r.r, 4, 0, &key);
	key = 9;
	addv(&b, &st_r.r, 5, 0, &key);
	sd_buildend(&b, &st_r.r);
	uint64_t poff = f.size;
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &st_r.r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &st_r.r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &st_r.r) == 0 );

	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);
	key = 10;
	addv(&b, &st_r.r, 6, 0, &key);
	key = 11;
	addv(&b, &st_r.r, 7, 0, &key);
	key = 13;
	addv(&b, &st_r.r, 8, 0, &key);
	sd_buildend(&b, &st_r.r);
	poff = f.size;
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &st_r.r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &st_r.r) == 0 );

	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);
	key = 15;
	addv(&b, &st_r.r, 9, 0, &key);
	key = 18;
	addv(&b, &st_r.r, 10, 0, &key);
	key = 20;
	addv(&b, &st_r.r, 11, 0, &key);
	sd_buildend(&b, &st_r.r);
	poff = f.size;
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &st_r.r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &st_r.r) == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	t( sd_indexcommit(&index, &st_r.r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&st_r.r, &f, NULL, &index) == 0 );
	t( sd_seal(&st_r.r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &st_r.a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 0,
		.compression_if  = NULL,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &st_r.r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	/* page 0 */
	t( ss_iteratorhas(&it) != 0 );
	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 9);
	ss_iteratornext(&it);

	/* page 1 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 10);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 11);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 13);
	ss_iteratornext(&it);

	/* page 2 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 15);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 18);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 20);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&st_r.vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &st_r.r);
	sd_buildfree(&b, &st_r.r);
	ss_buffree(&xfbuf, &st_r.a);
	ss_buffree(&buf, &st_r.a);
}

static void
sd_read_gt0_compression_zstd(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	t( sd_buildend(&b, &r) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, sizeof(sdseal));
	t( rc == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );
	t( sd_writepage(&r, &f, NULL, &b) == 0 );
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	t( sd_buildcommit(&b, &r) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_zstdfilter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);

	ss_buffree(&xfbuf, &a);
	ss_buffree(&buf, &a);
	sf_schemefree(&cmp, &a);
}

static void
sd_read_gt0_compression_lz4(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	t( sd_buildend(&b, &r) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, sizeof(sdseal));
	t( rc == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	t( sd_indexcommit(&index, &r, &id, NULL, 0) == 0 );

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );
	t( sd_writepage(&r, &f, NULL, &b) == 0 );
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	t( sd_buildcommit(&b, &r) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_lz4filter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);

	ss_buffree(&xfbuf, &a);
	ss_buffree(&buf, &a);
	sf_schemefree(&cmp, &a);
}

static void
sd_read_gt1_compression_zstd(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	sd_buildend(&b, &r);
	uint64_t poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);
	key = 10;
	addv(&b, &r, 6, 0, &key);
	key = 11;
	addv(&b, &r, 7, 0, &key);
	key = 13;
	addv(&b, &r, 8, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);
	key = 15;
	addv(&b, &r, 9, 0, &key);
	key = 18;
	addv(&b, &r, 10, 0, &key);
	key = 20;
	addv(&b, &r, 11, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );

	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_zstdfilter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	/* page 0 */
	t( ss_iteratorhas(&it) != 0 );
	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);

	/* page 1 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 10);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 11);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 13);
	ss_iteratornext(&it);

	/* page 2 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 15);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 18);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 20);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);
	ss_buffree(&buf, &a);
	ss_buffree(&xfbuf, &a);
	sf_schemefree(&cmp, &a);
}

static void
sd_read_gt1_compression_lz4(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	sd_buildend(&b, &r);
	uint64_t poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);
	key = 10;
	addv(&b, &r, 6, 0, &key);
	key = 11;
	addv(&b, &r, 7, 0, &key);
	key = 13;
	addv(&b, &r, 8, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);
	key = 15;
	addv(&b, &r, 9, 0, &key);
	key = 18;
	addv(&b, &r, 10, 0, &key);
	key = 20;
	addv(&b, &r, 11, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );

	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_lz4filter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	/* page 0 */
	t( ss_iteratorhas(&it) != 0 );
	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);

	/* page 1 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 10);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 11);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 13);
	ss_iteratornext(&it);

	/* page 2 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 15);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 18);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 20);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);
	ss_buffree(&buf, &a);
	ss_buffree(&xfbuf, &a);
	sf_schemefree(&cmp, &a);
}

stgroup *sd_read_group(void)
{
	stgroup *group = st_group("sdread");
	st_groupadd(group, st_test("gt0", sd_read_gt0));
	st_groupadd(group, st_test("gt1", sd_read_gt1));
	st_groupadd(group, st_test("gt0_compression_zstd", sd_read_gt0_compression_zstd));
	st_groupadd(group, st_test("gt0_compression_lz4", sd_read_gt0_compression_lz4));
	st_groupadd(group, st_test("gt1_compression_zstd", sd_read_gt1_compression_zstd));
	st_groupadd(group, st_test("gt1_compression_lz4", sd_read_gt1_compression_lz4));
	return group;
}