Beispiel #1
0
int si_nodefree(sinode *n, sr *r, int gc)
{
	int rcret = 0;
	int rc;
	if (gc && ss_pathis_set(&n->file.path)) {
		ss_fileadvise(&n->file, 0, 0, n->file.size);
		rc = ss_vfsunlink(r->vfs, ss_pathof(&n->file.path));
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "db file '%s' unlink error: %s",
			               ss_pathof(&n->file.path),
			               strerror(errno));
			rcret = -1;
		}
	}
	si_nodefree_branches(n, r);
	rc = si_nodeclose(n, r, gc);
	if (ssunlikely(rc == -1))
		rcret = -1;
	ss_free(r->a, n);
	return rcret;
}
Beispiel #2
0
static inline int
si_recoversnapshot(si *i, sr *r, sdsnapshot *s)
{
	/* recovery stages:

	   snapshot            (1) ok
	   snapshot.incomplete (2) remove snapshot.incomplete
	   snapshot            (3) remove snapshot.incomplete, load snapshot
	   snapshot.incomplete
	*/

	/* recover snapshot file (crash recover) */
	int snapshot = 0;
	int snapshot_incomplete = 0;

	char path[1024];
	snprintf(path, sizeof(path), "%s/index", i->scheme->path);
	snapshot = ss_vfsexists(r->vfs, path);
	snprintf(path, sizeof(path), "%s/index.incomplete", i->scheme->path);
	snapshot_incomplete = ss_vfsexists(r->vfs, path);

	int rc;
	if (snapshot_incomplete) {
		rc = ss_vfsunlink(r->vfs, path);
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "index file '%s' unlink error: %s",
			               path, strerror(errno));
			return -1;
		}
	}
	if (! snapshot)
		return 0;

	/* read snapshot file */
	snprintf(path, sizeof(path), "%s/index", i->scheme->path);

	ssize_t size = ss_vfssize(r->vfs, path);
	if (ssunlikely(size == -1)) {
		sr_malfunction(r->e, "index file '%s' read error: %s",
		               path, strerror(errno));
		return -1;
	}
	rc = ss_bufensure(&s->buf, r->a, size);
	if (ssunlikely(rc == -1))
		return sr_oom_malfunction(r->e);
	ssfile file;
	ss_fileinit(&file, r->vfs);
	rc = ss_fileopen(&file, path);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' open error: %s",
		               path, strerror(errno));
		return -1;
	}
	rc = ss_filepread(&file, 0, s->buf.s, size);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' read error: %s",
		               path, strerror(errno));
		ss_fileclose(&file);
		return -1;
	}
	ss_bufadvance(&s->buf, size);
	ss_fileclose(&file);
	return 0;
}
Beispiel #3
0
int si_snapshot(si *index, siplan *plan)
{
	sr *r = index->r;

	ssfile file;
	ss_fileinit(&file, r->vfs);

	/* prepare to take snapshot */
	sdsnapshot snapshot;
	sd_snapshot_init(&snapshot);
	int rc = ss_bufensure(&snapshot.buf, r->a, 1 * 1024 * 1024);
	if (ssunlikely(rc == -1))
		goto error_oom;
	rc = sd_snapshot_begin(&snapshot, r);
	if (ssunlikely(rc == -1))
		goto error_oom;

	/* save node index image */
	si_lock(index);
	ssrbnode *p = NULL;
	while ((p = ss_rbnext(&index->i, p)))
	{
		sinode *n = sscast(p, sinode, node);
		rc = sd_snapshot_add(&snapshot, r, n->self.id.id,
		                     n->file.size,
		                     n->branch_count,
		                     n->temperature_reads);
		if (ssunlikely(rc == -1)) {
			si_unlock(index);
			goto error_oom;
		}
		sibranch *b = &n->self;
		while (b) {
			rc = sd_snapshot_addbranch(&snapshot, r, b->index.h);
			if (ssunlikely(rc == -1)) {
				si_unlock(index);
				goto error_oom;
			}
			b = b->link;
		}
	}
	sd_snapshot_commit(&snapshot, r,
	                   index->lru_v,
	                   index->lru_steps,
	                   index->lru_intr_lsn,
	                   index->lru_intr_sum,
	                   index->read_disk,
	                   index->read_cache);
	si_unlock(index);

	/* create snapshot.inprogress */
	char path[PATH_MAX];
	snprintf(path, sizeof(path), "%s/index.incomplete",
	         index->scheme->path);
	rc = ss_filenew(&file, path);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' create error: %s",
		               path, strerror(errno));
		goto error;
	}
	rc = ss_filewrite(&file, snapshot.buf.s, ss_bufused(&snapshot.buf));
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' write error: %s",
		               path, strerror(errno));
		goto error;
	}

	SS_INJECTION(r->i, SS_INJECTION_SI_SNAPSHOT_0,
	             ss_fileclose(&file);
	             sd_snapshot_free(&snapshot, r);
				 sr_malfunction(r->e, "%s", "error injection");
				 return -1);

	/* sync snapshot file */
	if (index->scheme->sync) {
		rc = ss_filesync(&file);
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "index file '%s' sync error: %s",
			               path, strerror(errno));
			goto error;
		}
	}

	/* remove old snapshot file (if exists) */
	snprintf(path, sizeof(path), "%s/index", index->scheme->path);
	ss_vfsunlink(r->vfs, path);

	SS_INJECTION(r->i, SS_INJECTION_SI_SNAPSHOT_1,
	             ss_fileclose(&file);
	             sd_snapshot_free(&snapshot, r);
				 sr_malfunction(r->e, "%s", "error injection");
				 return -1);

	/* rename snapshot.incomplete to snapshot */
	rc = ss_filerename(&file, path);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' rename error: %s",
		               ss_pathof(&file.path),
		               strerror(errno));
		goto error;
	}

	SS_INJECTION(r->i, SS_INJECTION_SI_SNAPSHOT_2,
	             ss_fileclose(&file);
	             sd_snapshot_free(&snapshot, r);
				 sr_malfunction(r->e, "%s", "error injection");
				 return -1);

	/* close snapshot file */
	rc = ss_fileclose(&file);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' close error: %s",
		               path, strerror(errno));
		goto error;
	}

	sd_snapshot_free(&snapshot, r);

	/* finish index snapshot */
	si_lock(index);
	index->snapshot = plan->a;
	index->snapshot_run = 0;
	si_unlock(index);
	return 0;

error_oom:
	sr_oom(r->e);
error:
	ss_fileclose(&file);
	sd_snapshot_free(&snapshot, r);
	return -1;
}
Beispiel #4
0
static inline int
si_trackdir(sitrack *track, sr *r, si *i)
{
	DIR *dir = opendir(i->scheme->path);
	if (ssunlikely(dir == NULL)) {
		sr_malfunction(r->e, "directory '%s' open error: %s",
		               i->scheme->path, strerror(errno));
		return -1;
	}
	struct dirent *de;
	while ((de = readdir(dir))) {
		if (ssunlikely(de->d_name[0] == '.'))
			continue;
		uint64_t id_parent = 0;
		uint64_t id = 0;
		int rc = si_process(de->d_name, &id, &id_parent);
		if (ssunlikely(rc == -1))
			continue; /* skip unknown file */
		si_tracknsn(track, id_parent);
		si_tracknsn(track, id);

		sinode *head, *node;
		sspath path;
		switch (rc) {
		case SI_RDB_DBI:
		case SI_RDB_DBSEAL: {
			/* find parent node and mark it as having
			 * incomplete compaction process */
			head = si_trackget(track, id_parent);
			if (sslikely(head == NULL)) {
				head = si_nodenew(r);
				if (ssunlikely(head == NULL))
					goto error;
				head->self.id.id = id_parent;
				head->recover = SI_RDB_UNDEF;
				si_trackset(track, head);
			}
			head->recover |= rc;
			/* remove any incomplete file made during compaction */
			if (rc == SI_RDB_DBI) {
				ss_pathcompound(&path, i->scheme->path, id_parent, id,
				                ".db.incomplete");
				rc = ss_vfsunlink(r->vfs, path.path);
				if (ssunlikely(rc == -1)) {
					sr_malfunction(r->e, "db file '%s' unlink error: %s",
					               path.path, strerror(errno));
					goto error;
				}
				continue;
			}
			assert(rc == SI_RDB_DBSEAL);
			/* recover 'sealed' node */
			node = si_nodenew(r);
			if (ssunlikely(node == NULL))
				goto error;
			node->recover = SI_RDB_DBSEAL;
			ss_pathcompound(&path, i->scheme->path, id_parent, id,
			                ".db.seal");
			rc = si_nodeopen(node, r, i->scheme, &path, NULL);
			if (ssunlikely(rc == -1)) {
				si_nodefree(node, r, 0);
				goto error;
			}
			si_trackset(track, node);
			si_trackmetrics(track, node);
			continue;
		}
		}
		assert(rc == SI_RDB);

		head = si_trackget(track, id);
		if (head != NULL && (head->recover & SI_RDB)) {
			/* loaded by snapshot */
			continue;
		}

		/* recover node */
		node = si_nodenew(r);
		if (ssunlikely(node == NULL))
			goto error;
		node->recover = SI_RDB;
		ss_path(&path, i->scheme->path, id, ".db");
		rc = si_nodeopen(node, r, i->scheme, &path, NULL);
		if (ssunlikely(rc == -1)) {
			si_nodefree(node, r, 0);
			goto error;
		}
		si_trackmetrics(track, node);

		/* track node */
		if (sslikely(head == NULL)) {
			si_trackset(track, node);
		} else {
			/* replace a node previously created by a
			 * incomplete compaction */
			si_trackreplace(track, head, node);
			head->recover &= ~SI_RDB_UNDEF;
			node->recover |= head->recover;
			si_nodefree(head, r, 0);
		}
	}
	closedir(dir);
	return 0;
error:
	closedir(dir);
	return -1;
}
Beispiel #5
0
static void
sd_read_gt0(void)
{
	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);

	int key = 7;
	addv(&b, &st_r.r, 3, 0, &key);
	key = 8;
	addv(&b, &st_r.r, 4, 0, &key);
	key = 9;
	addv(&b, &st_r.r, 5, 0, &key);
	sd_buildend(&b, &st_r.r);

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &st_r.r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &st_r.r, &b, sizeof(sdseal));
	t( rc == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	ssfile f;
	ss_fileinit(&f, &st_r.vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&st_r.r, &f, NULL) == 0 );
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );
	t( sd_indexcommit(&index, &st_r.r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&st_r.r, &f, NULL, &index) == 0 );
	t( sd_seal(&st_r.r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &st_r.a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 0,
		.compression_if  = NULL,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &st_r.r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 9);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&st_r.vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &st_r.r);
	sd_buildfree(&b, &st_r.r);
	ss_buffree(&xfbuf, &st_r.a);
	ss_buffree(&buf, &st_r.a);
}

static void
sd_read_gt1(void)
{
	ssfile f;
	ss_fileinit(&f, &st_r.vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&st_r.r, &f, NULL) == 0 );

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);

	int key = 7;
	addv(&b, &st_r.r, 3, 0, &key);
	key = 8;
	addv(&b, &st_r.r, 4, 0, &key);
	key = 9;
	addv(&b, &st_r.r, 5, 0, &key);
	sd_buildend(&b, &st_r.r);
	uint64_t poff = f.size;
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &st_r.r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &st_r.r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &st_r.r) == 0 );

	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);
	key = 10;
	addv(&b, &st_r.r, 6, 0, &key);
	key = 11;
	addv(&b, &st_r.r, 7, 0, &key);
	key = 13;
	addv(&b, &st_r.r, 8, 0, &key);
	sd_buildend(&b, &st_r.r);
	poff = f.size;
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &st_r.r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &st_r.r) == 0 );

	t( sd_buildbegin(&b, &st_r.r, 1, 0, 0, 0, NULL) == 0);
	key = 15;
	addv(&b, &st_r.r, 9, 0, &key);
	key = 18;
	addv(&b, &st_r.r, 10, 0, &key);
	key = 20;
	addv(&b, &st_r.r, 11, 0, &key);
	sd_buildend(&b, &st_r.r);
	poff = f.size;
	t( sd_writepage(&st_r.r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &st_r.r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &st_r.r) == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	t( sd_indexcommit(&index, &st_r.r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&st_r.r, &f, NULL, &index) == 0 );
	t( sd_seal(&st_r.r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &st_r.a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 0,
		.compression_if  = NULL,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &st_r.r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	/* page 0 */
	t( ss_iteratorhas(&it) != 0 );
	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 9);
	ss_iteratornext(&it);

	/* page 1 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 10);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 11);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 13);
	ss_iteratornext(&it);

	/* page 2 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 15);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 18);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &st_r.r, 0, NULL) == 20);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&st_r.vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &st_r.r);
	sd_buildfree(&b, &st_r.r);
	ss_buffree(&xfbuf, &st_r.a);
	ss_buffree(&buf, &st_r.a);
}

static void
sd_read_gt0_compression_zstd(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	t( sd_buildend(&b, &r) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, sizeof(sdseal));
	t( rc == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );
	t( sd_writepage(&r, &f, NULL, &b) == 0 );
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	t( sd_buildcommit(&b, &r) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_zstdfilter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);

	ss_buffree(&xfbuf, &a);
	ss_buffree(&buf, &a);
	sf_schemefree(&cmp, &a);
}

static void
sd_read_gt0_compression_lz4(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	t( sd_buildend(&b, &r) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, sizeof(sdseal));
	t( rc == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));

	t( sd_indexcommit(&index, &r, &id, NULL, 0) == 0 );

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );
	t( sd_writepage(&r, &f, NULL, &b) == 0 );
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );
	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	t( sd_buildcommit(&b, &r) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_lz4filter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);

	ss_buffree(&xfbuf, &a);
	ss_buffree(&buf, &a);
	sf_schemefree(&cmp, &a);
}

static void
sd_read_gt1_compression_zstd(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	sd_buildend(&b, &r);
	uint64_t poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);
	key = 10;
	addv(&b, &r, 6, 0, &key);
	key = 11;
	addv(&b, &r, 7, 0, &key);
	key = 13;
	addv(&b, &r, 8, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_zstdfilter) == 0);
	key = 15;
	addv(&b, &r, 9, 0, &key);
	key = 18;
	addv(&b, &r, 10, 0, &key);
	key = 20;
	addv(&b, &r, 11, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );

	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_zstdfilter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	/* page 0 */
	t( ss_iteratorhas(&it) != 0 );
	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);

	/* page 1 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 10);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 11);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 13);
	ss_iteratornext(&it);

	/* page 2 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 15);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 18);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 20);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);
	ss_buffree(&buf, &a);
	ss_buffree(&xfbuf, &a);
	sf_schemefree(&cmp, &a);
}

static void
sd_read_gt1_compression_lz4(void)
{
	ssa a;
	ss_aopen(&a, &ss_stda);
	ssa aref;
	ss_aopen(&aref, &ss_stda);
	ssvfs vfs;
	ss_vfsinit(&vfs, &ss_stdvfs);
	sfscheme cmp;
	sf_schemeinit(&cmp);
	sffield *field = sf_fieldnew(&a, "key");
	t( sf_fieldoptions(field, &a, "u32,key(0)") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	field = sf_fieldnew(&a, "value");
	t( sf_fieldoptions(field, &a, "string") == 0 );
	t( sf_schemeadd(&cmp, &a, field) == 0 );
	t( sf_schemevalidate(&cmp, &a) == 0 );
	ssinjection ij;
	memset(&ij, 0, sizeof(ij));
	srstat stat;
	memset(&stat, 0, sizeof(stat));
	srerror error;
	sr_errorinit(&error);
	srseq seq;
	sr_seqinit(&seq);
	sscrcf crc = ss_crc32c_function();
	sr r;
	sr_init(&r, NULL, &error, &a, &aref, &vfs, NULL, NULL, &seq, SF_RAW,
	        NULL, &cmp, &ij, &stat, crc);

	ssfile f;
	ss_fileinit(&f, &vfs);
	t( ss_filenew(&f, "./0000.db") == 0 );
	t( sd_writeseal(&r, &f, NULL) == 0 );

	sdbuild b;
	sd_buildinit(&b);
	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);

	int key = 7;
	addv(&b, &r, 3, 0, &key);
	key = 8;
	addv(&b, &r, 4, 0, &key);
	key = 9;
	addv(&b, &r, 5, 0, &key);
	sd_buildend(&b, &r);
	uint64_t poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	sdindex index;
	sd_indexinit(&index);
	t( sd_indexbegin(&index, &r) == 0 );

	int rc;
	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);
	key = 10;
	addv(&b, &r, 6, 0, &key);
	key = 11;
	addv(&b, &r, 7, 0, &key);
	key = 13;
	addv(&b, &r, 8, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );
	sd_buildreset(&b, &r);

	t( sd_buildbegin(&b, &r, 1, 0, 0, 1, &ss_lz4filter) == 0);
	key = 15;
	addv(&b, &r, 9, 0, &key);
	key = 18;
	addv(&b, &r, 10, 0, &key);
	key = 20;
	addv(&b, &r, 11, 0, &key);
	sd_buildend(&b, &r);
	poff = f.size;
	t( sd_writepage(&r, &f, NULL, &b) == 0 );

	rc = sd_indexadd(&index, &r, &b, poff);
	t( rc == 0 );
	t( sd_buildcommit(&b, &r) == 0 );

	sdid id;
	memset(&id, 0, sizeof(id));
	t( sd_indexcommit(&index, &r, &id, NULL, f.size) == 0 );

	t( sd_writeindex(&r, &f, NULL, &index) == 0 );
	t( sd_seal(&r, &f, NULL, &index, 0) == 0 );

	ssmmap map;
	t( ss_vfsmmap(&st_r.vfs, &map, f.fd, f.size, 1) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	ssbuf xfbuf;
	ss_bufinit(&xfbuf);
	t( ss_bufensure(&xfbuf, &a, 1024) == 0 );

	ssiter index_iter;
	ssiter page_iter;

	sdreadarg arg = {
		.index           = &index,
		.buf             = &buf,
		.buf_xf          = &xfbuf,
		.buf_read        = NULL,
		.index_iter      = &index_iter,
		.page_iter       = &page_iter,
		.mmap            = &map,
		.memory          = NULL,
		.file            = NULL,
		.o               = SS_GT,
		.use_memory      = 0,
		.use_mmap        = 1,
		.use_mmap_copy   = 0,
		.use_compression = 1,
		.compression_if  = &ss_lz4filter,
		.has             = 0,
		.has_vlsn        = 0,
		.r               = &r
	};

	ssiter it;
	ss_iterinit(sd_read, &it);
	ss_iteropen(sd_read, &it, &arg, NULL, 0);
	t( ss_iteratorhas(&it) == 1 );

	/* page 0 */
	t( ss_iteratorhas(&it) != 0 );
	sv *v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 7);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 8);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 9);
	ss_iteratornext(&it);

	/* page 1 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 10);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 11);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 13);
	ss_iteratornext(&it);

	/* page 2 */
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 15);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 18);
	ss_iteratornext(&it);
	v = ss_iteratorof(&it);
	t( *(int*)sv_field(v, &r, 0, NULL) == 20);
	ss_iteratornext(&it);
	t( ss_iteratorhas(&it) == 0 );
	ss_iteratorclose(&it);

	ss_fileclose(&f);
	t( ss_vfsmunmap(&st_r.vfs, &map) == 0 );
	t( ss_vfsunlink(&vfs, "./0000.db") == 0 );

	sd_indexfree(&index, &r);
	sd_buildfree(&b, &r);
	ss_buffree(&buf, &a);
	ss_buffree(&xfbuf, &a);
	sf_schemefree(&cmp, &a);
}

stgroup *sd_read_group(void)
{
	stgroup *group = st_group("sdread");
	st_groupadd(group, st_test("gt0", sd_read_gt0));
	st_groupadd(group, st_test("gt1", sd_read_gt1));
	st_groupadd(group, st_test("gt0_compression_zstd", sd_read_gt0_compression_zstd));
	st_groupadd(group, st_test("gt0_compression_lz4", sd_read_gt0_compression_lz4));
	st_groupadd(group, st_test("gt1_compression_zstd", sd_read_gt1_compression_zstd));
	st_groupadd(group, st_test("gt1_compression_lz4", sd_read_gt1_compression_lz4));
	return group;
}