Example #1
0
int sd_indexcommit(sdindex *i, sr *r, sdid *id, ssqf *qf,
                   uint32_t align,
                   uint64_t offset)
{
	int size = ss_bufused(&i->v);
	int size_extension = 0;
	int extensions = 0;
	if (qf) {
		extensions = SD_INDEXEXT_AMQF;
		size_extension += sizeof(sdindexamqf);
		size_extension += qf->qf_table_size;
	}
	/* calculate index align for direct_io */
	int size_meta  = size + size_extension + sizeof(sdindexheader);
	int size_align = 0;
	if (align) {
		size_align += align - ((offset + size_meta + ss_bufused(&i->i)) % align);
		size_meta  += size_align;
	}
	int rc = ss_bufensure(&i->i, r->a, size_meta);
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	/* min/max pairs */
	memcpy(i->i.p, i->v.s, size);
	ss_bufadvance(&i->i, size);
	/* extension */
	if (qf) {
		sdindexamqf *qh = (sdindexamqf*)(i->i.p);
		qh->q       = qf->qf_qbits;
		qh->r       = qf->qf_rbits;
		qh->entries = qf->qf_entries;
		qh->size    = qf->qf_table_size;
		ss_bufadvance(&i->i, sizeof(sdindexamqf));
		memcpy(i->i.p, qf->qf_table, qf->qf_table_size);
		ss_bufadvance(&i->i, qf->qf_table_size);
	}
	ss_buffree(&i->v, r->a);
	sdindexheader *h = &i->build;
	/* align */
	if (size_align) {
		h->align = size_align;
		memset(i->i.p, 0, size_align);
		ss_bufadvance(&i->i, size_align);
	}
	/* header */
	h->offset     = offset;
	h->id         = *id;
	h->extension  = size_extension;
	h->extensions = extensions;
	h->crc = ss_crcs(r->crc, h, sizeof(sdindexheader), 0);
	memcpy(i->i.p, &i->build, sizeof(sdindexheader));
	ss_bufadvance(&i->i, sizeof(sdindexheader));
	i->h = sd_indexheader(i);
	return 0;
}
Example #2
0
int sw_dirread(ssbuf *list, ssa *a, swdirtype *types, char *dir)
{
	DIR *d = opendir(dir);
	if (ssunlikely(d == NULL))
		return -1;

	struct dirent *de;
	while ((de = readdir(d))) {
		if (ssunlikely(de->d_name[0] == '.'))
			continue;
		ssize_t id = sw_diridof(de->d_name);
		if (ssunlikely(id == -1))
			goto error;
		char *ext = strstr(de->d_name, ".");
		if (ssunlikely(ext == NULL))
			goto error;
		ext++;
		swdirtype *type = sw_dirtypeof(types, ext);
		if (ssunlikely(type == NULL))
			continue;
		swdirid *n = sw_dirmatch(list, id);
		if (n) {
			n->mask |= type->mask;
			type->count++;
			continue;
		}
		int rc = ss_bufensure(list, a, sizeof(swdirid));
		if (ssunlikely(rc == -1))
			goto error;
		n = (swdirid*)list->p;
		ss_bufadvance(list, sizeof(swdirid));
		n->id  = id;
		n->mask = type->mask;
		type->count++;
	}
	closedir(d);

	if (ssunlikely(ss_bufused(list) == 0))
		return 0;

	int n = ss_bufused(list) / sizeof(swdirid);
	qsort(list->s, n, sizeof(swdirid), sw_dircmp);
	return n;

error:
	closedir(d);
	return -1;
}
Example #3
0
int sd_indexcommit(sdindex *i, sr *r, sdid *id, ssqf *qf, uint64_t offset)
{
	int size = ss_bufused(&i->v);
	int size_extension = 0;
	int extensions = 0;
	if (qf) {
		extensions = SD_INDEXEXT_AMQF;
		size_extension += sizeof(sdindexamqf);
		size_extension += qf->qf_table_size;
	}
	int rc = ss_bufensure(&i->i, r->a, size + size_extension);
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	memcpy(i->i.p, i->v.s, size);
	ss_bufadvance(&i->i, size);
	if (qf) {
		sdindexamqf *qh = (sdindexamqf*)(i->i.p);
		qh->q       = qf->qf_qbits;
		qh->r       = qf->qf_rbits;
		qh->entries = qf->qf_entries;
		qh->size    = qf->qf_table_size;
		ss_bufadvance(&i->i, sizeof(sdindexamqf));
		memcpy(i->i.p, qf->qf_table, qf->qf_table_size);
		ss_bufadvance(&i->i, qf->qf_table_size);
	}
	ss_buffree(&i->v, r->a);
	i->h = sd_indexheader(i);
	i->h->offset     = offset;
	i->h->id         = *id;
	i->h->extension  = size_extension;
	i->h->extensions = extensions;
	i->h->crc = ss_crcs(r->crc, i->h, sizeof(sdindexheader), 0);
	return 0;
}
Example #4
0
static int
si_redistribute_index(si *index, sr *r, sdc *c, sinode *node)
{
	svindex *vindex = si_nodeindex(node);
	ssiter i;
	ss_iterinit(sv_indexiter, &i);
	ss_iteropen(sv_indexiter, &i, r, vindex, SS_GTE, NULL, 0);
	while (ss_iterhas(sv_indexiter, &i)) {
		sv *v = ss_iterof(sv_indexiter, &i);
		int rc = ss_bufadd(&c->b, r->a, &v->v, sizeof(svv**));
		if (ssunlikely(rc == -1))
			return sr_oom_malfunction(r->e);
		ss_iternext(sv_indexiter, &i);
	}
	if (ssunlikely(ss_bufused(&c->b) == 0))
		return 0;
	uint64_t now = ss_utime();
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, &c->b, sizeof(svv*));
	while (ss_iterhas(ss_bufiterref, &i)) {
		svv *v = ss_iterof(ss_bufiterref, &i);
		si_redistribute_set(index, r, now, v);
		ss_iternext(ss_bufiterref, &i);
	}
	return 0;
}
Example #5
0
static int
si_redistribute(si *index, sr *r, sdc *c, sinode *node, ssbuf *result)
{
	(void)index;
	svindex *vindex = si_nodeindex(node);
	ssiter i;
	ss_iterinit(sv_indexiter, &i);
	ss_iteropen(sv_indexiter, &i, r, vindex, SS_GTE, NULL, 0);
	while (ss_iterhas(sv_indexiter, &i))
	{
		sv *v = ss_iterof(sv_indexiter, &i);
		int rc = ss_bufadd(&c->b, r->a, &v->v, sizeof(svv**));
		if (ssunlikely(rc == -1))
			return sr_oom_malfunction(r->e);
		ss_iternext(sv_indexiter, &i);
	}
	if (ssunlikely(ss_bufused(&c->b) == 0))
		return 0;
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, &c->b, sizeof(svv*));
	ssiter j;
	ss_iterinit(ss_bufiterref, &j);
	ss_iteropen(ss_bufiterref, &j, result, sizeof(sinode*));
	sinode *prev = ss_iterof(ss_bufiterref, &j);
	ss_iternext(ss_bufiterref, &j);
	while (1)
	{
		sinode *p = ss_iterof(ss_bufiterref, &j);
		if (p == NULL) {
			assert(prev != NULL);
			while (ss_iterhas(ss_bufiterref, &i)) {
				svv *v = ss_iterof(ss_bufiterref, &i);
				v->next = NULL;
				sv_indexset(&prev->i0, r, v);
				ss_iternext(ss_bufiterref, &i);
			}
			break;
		}
		while (ss_iterhas(ss_bufiterref, &i))
		{
			svv *v = ss_iterof(ss_bufiterref, &i);
			v->next = NULL;
			sdindexpage *page = sd_indexmin(&p->self.index);
			int rc = sr_compare(r->scheme, sv_vpointer(v), v->size,
			                    sd_indexpage_min(&p->self.index, page),
			                    page->sizemin);
			if (ssunlikely(rc >= 0))
				break;
			sv_indexset(&prev->i0, r, v);
			ss_iternext(ss_bufiterref, &i);
		}
		if (ssunlikely(! ss_iterhas(ss_bufiterref, &i)))
			break;
		prev = p;
		ss_iternext(ss_bufiterref, &j);
	}
	assert(ss_iterof(ss_bufiterref, &i) == NULL);
	return 0;
}
Example #6
0
static inline swdirid*
sw_dirmatch(ssbuf *list, uint64_t id)
{
	if (ssunlikely(ss_bufused(list) == 0))
		return NULL;
	swdirid *n = (swdirid*)list->s;
	while ((char*)n < list->p) {
		if (n->id == id)
			return n;
		n++;
	}
	return NULL;
}
Example #7
0
int sd_indexadd(sdindex *i, sr *r, sdbuild *build, uint64_t offset)
{
	int rc = ss_bufensure(&i->i, r->a, sizeof(sdindexpage));
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	sdpageheader *ph = sd_buildheader(build);

	int size = ph->size + sizeof(sdpageheader);
	int sizeorigin = ph->sizeorigin + sizeof(sdpageheader);

	/* prepare page header */
	sdindexpage *p = (sdindexpage*)i->i.p;
	p->offset      = offset;
	p->offsetindex = ss_bufused(&i->v);
	p->lsnmin      = ph->lsnmin;
	p->lsnmax      = ph->lsnmax;
	p->size        = size;
	p->sizeorigin  = sizeorigin;
	p->sizemin     = 0;
	p->sizemax     = 0;

	/* copy keys */
	if (ssunlikely(ph->count > 0)) {
		char *min = sd_buildmin(build, r);
		char *max = sd_buildmax(build, r);
		rc = sd_indexadd_raw(i, r, p, min, max);
		if (ssunlikely(rc == -1))
			return -1;
	}

	/* update index info */
	sdindexheader *h = &i->build;
	h->count++;
	h->size  += sizeof(sdindexpage) + p->sizemin + p->sizemax;
	h->keys  += ph->count;
	h->total += size;
	h->totalorigin += sizeorigin;
	if (build->vmax > h->sizevmax)
		h->sizevmax = build->vmax;
	if (ph->lsnmin < h->lsnmin)
		h->lsnmin = ph->lsnmin;
	if (ph->lsnmax > h->lsnmax)
		h->lsnmax = ph->lsnmax;
	if (ph->tsmin < h->tsmin)
		h->tsmin = ph->tsmin;
	h->dupkeys += ph->countdup;
	if (ph->lsnmindup < h->dupmin)
		h->dupmin = ph->lsnmindup;
	ss_bufadvance(&i->i, sizeof(sdindexpage));
	return 0;
}
Example #8
0
int sd_buildend(sdbuild *b, sr *r)
{
	/* update sizes */
	sdbuildref *ref = sd_buildref(b);
	ref->msize = ss_bufused(&b->m) - ref->m;
	ref->vsize = ss_bufused(&b->v) - ref->v;
	ref->ksize = ss_bufused(&b->k) - ref->k;
	ref->csize = 0;
	/* calculate data crc (non-compressed) */
	sdpageheader *h = sd_buildheader(b);
	uint32_t crc = 0;
	if (sslikely(b->crc)) {
		crc = ss_crcp(r->crc, b->m.s + ref->m, ref->msize, 0);
		crc = ss_crcp(r->crc, b->v.s + ref->v, ref->vsize, crc);
		crc = ss_crcp(r->crc, b->k.s + ref->k, ref->ksize, crc);
	}
	h->crcdata = crc;
	/* compression */
	if (b->compress) {
		int rc = sd_buildcompress(b, r);
		if (ssunlikely(rc == -1))
			return -1;
		ref->csize = ss_bufused(&b->c) - ref->c;
	}
	/* update page header */
	int total = ref->msize + ref->vsize + ref->ksize;
	h->sizekeys = ref->ksize;
	h->sizeorigin = total - sizeof(sdpageheader);
	h->size = h->sizeorigin;
	if (b->compress)
		h->size = ref->csize - sizeof(sdpageheader);
	else
		h->size = h->sizeorigin;
	h->crc = ss_crcs(r->crc, h, sizeof(sdpageheader), 0);
	if (b->compress)
		memcpy(b->c.s + ref->c, h, sizeof(sdpageheader));
	return 0;
}
Example #9
0
int sd_buildbegin(sdbuild *b, sr *r, int crc, int compress, int compress_dup)
{
	b->crc = crc;
	b->compress = compress;
	b->compress_dup = compress_dup;
	int rc;
	if (compress_dup && b->tracker.size == 0) {
		rc = ss_htinit(&b->tracker, r->a, 32768);
		if (ssunlikely(rc == -1))
			return sr_oom(r->e);
	}
	rc = ss_bufensure(&b->list, r->a, sizeof(sdbuildref));
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	sdbuildref *ref =
		(sdbuildref*)ss_bufat(&b->list, sizeof(sdbuildref), b->n);
	ref->m     = ss_bufused(&b->m);
	ref->msize = 0;
	ref->v     = ss_bufused(&b->v);
	ref->vsize = 0;
	ref->k     = ss_bufused(&b->k);
	ref->ksize = 0;
	ref->c     = ss_bufused(&b->c);
	ref->csize = 0;
	rc = ss_bufensure(&b->m, r->a, sizeof(sdpageheader));
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	sdpageheader *h = sd_buildheader(b);
	memset(h, 0, sizeof(*h));
	h->lsnmin    = UINT64_MAX;
	h->lsnmindup = UINT64_MAX;
	memset(h->reserve, 0, sizeof(h->reserve));
	ss_bufadvance(&b->list, sizeof(sdbuildref));
	ss_bufadvance(&b->m, sizeof(sdpageheader));
	return 0;
}
static void
ss_lz4filter_compress_decompress(void)
{
	char text[] =
	"The Early English Text Society is a text publication society dedicated to the"
	"reprinting of early English texts, especially those only available in manuscript."
	"Most of its volumes are in Middle English and Old English. It was founded in "
	"England in 1864 by Frederick James Furnivall. Its stated goal in a report of the "
	"first year of their existence was \"on the one hand, to print all that is most "
	"valuable of the yet unprinted MSS. in English, and, on the other, to re-edit "
	"and reprint all that is most valuable in printed English books, which from their "
	"scarcity or price are not within the reach of the student of moderate means.\"[1]"
	"It is known for being the first to print many English "
	"manuscripts, including Cotton Nero A.x, which contains Pearl, Sir Gawain and the "
	"Green Knight, and other poems. By its own count, the Society has published 344"
	"volumes. Famous members of the society when it was formed in 1864 include "
	"Furnivall himself, Alfred Tennyson (poet laureate), Warren de la Rue (inventor "
	"of the lightbulb), Richard Chenevix Trench (Irish ecclesiastic), Stephen Austin "
	"(a Hertford-based printer), Edith Coleridge (granddaughter of Samuel Taylor "
	"Coleridge), and others.";

	ssbuf compressed;
	ss_bufinit(&compressed);

	ssfilter f;
	t( ss_filterinit(&f, &ss_lz4filter, &st_r.a, SS_FINPUT) == 0 );
	t( ss_filterstart(&f, &compressed) == 0 );
	t( ss_filternext(&f, &compressed, text, sizeof(text) - 1) == 0 );
	t( ss_filtercomplete(&f, &compressed) == 0 );
	t( ss_filterfree(&f) == 0 );

	ssbuf decompressed;
	ss_bufinit(&decompressed);
	t( ss_bufensure(&decompressed, &st_r.a, sizeof(text)) == 0 );

	t( ss_filterinit(&f, &ss_lz4filter, &st_r.a, SS_FOUTPUT) == 0 );
	t( ss_filternext(&f, &decompressed, compressed.s, ss_bufused(&compressed)) == 0 );
	t( ss_filterfree(&f) == 0 );

	t( memcmp(text, decompressed.s, sizeof(text) - 1) == 0 );

	ss_buffree(&compressed, &st_r.a);
	ss_buffree(&decompressed, &st_r.a);
}
Example #11
0
int sd_buildadd(sdbuild *b, sr *r, sv *v, uint32_t flags)
{
	/* prepare object metadata */
	int rc = ss_bufensure(&b->m, r->a, sizeof(sdv));
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	sdpageheader *h = sd_buildheader(b);
	sdv *sv = (sdv*)b->m.p;
	sv->flags  = sv_flags(v) | flags;
	sv->offset = ss_bufused(&b->v) - sd_buildref(b)->v;
	ss_bufadvance(&b->m, sizeof(sdv));
	/* copy object */
	switch (r->fmt_storage) {
	case SF_SKEYVALUE:
		rc = sd_buildadd_keyvalue(b, r, v);
		break;
	case SF_SRAW:
		rc = sd_buildadd_raw(b, r, v);
		break;
	}
	if (ssunlikely(rc == -1))
		return -1;
	/* update page header */
	h->count++;
	uint32_t size = sizeof(sdv) + sv_size(v) +
		sizeof(sfref) * r->scheme->count;
	if (size > b->vmax)
		b->vmax = size;
	uint64_t lsn = sv_lsn(v);
	if (lsn > h->lsnmax)
		h->lsnmax = lsn;
	if (lsn < h->lsnmin)
		h->lsnmin = lsn;
	if (sv->flags & SVDUP) {
		h->countdup++;
		if (lsn < h->lsnmindup)
			h->lsnmindup = lsn;
	}
	return 0;
}
Example #12
0
static void
sf_scheme_saveload(void)
{
	sfscheme cmp;
	sf_schemeinit(&cmp);

	sffield *field;
	field = sf_fieldnew(&st_r.a, "key");
	t( field != NULL );
	t( sf_fieldoptions(field, &st_r.a, "u32,key(0)") == 0);
	t( sf_schemeadd(&cmp, &st_r.a, field) == 0);

	field = sf_fieldnew(&st_r.a, "value");
	t( field != NULL );
	t( sf_fieldoptions(field, &st_r.a, "string") == 0);
	t( sf_schemeadd(&cmp, &st_r.a, field) == 0);
	t( sf_schemevalidate(&cmp, &st_r.a) == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	t( sf_schemesave(&cmp, &st_r.a, &buf) == 0 );
	sf_schemefree(&cmp, &st_r.a);

	sf_schemeinit(&cmp);
	t( sf_schemeload(&cmp, &st_r.a, buf.s, ss_bufused(&buf)) == 0 );
	t( sf_schemevalidate(&cmp, &st_r.a) == 0 );

	t( cmp.fields_count == 2 );
	t( cmp.keys_count == 1 );
	t( strcmp(cmp.fields[0]->name, "key") == 0 );
	t( cmp.fields[0]->type == SS_U32 );
	t( cmp.fields[0]->key == 1 );
	t( cmp.fields[1]->type == SS_STRING );
	t( cmp.fields[1]->key == 0 );

	sf_schemefree(&cmp, &st_r.a);
	ss_buffree(&buf, &st_r.a);
}
Example #13
0
static void
sr_scheme_saveload(void)
{
	srscheme cmp;
	sr_schemeinit(&cmp);
	srkey *part = sr_schemeadd(&cmp, &st_r.a);
	t( sr_keysetname(part, &st_r.a, "key") == 0 );
	t( sr_keyset(part, &st_r.a, "u32") == 0 );

	ssbuf buf;
	ss_bufinit(&buf);
	t( sr_schemesave(&cmp, &st_r.a, &buf) == 0 );
	sr_schemefree(&cmp, &st_r.a);

	sr_schemeinit(&cmp);
	t( sr_schemeload(&cmp, &st_r.a, buf.s, ss_bufused(&buf)) == 0 );

	t( cmp.count == 1 );
	t( strcmp(cmp.parts[0].name, "key") == 0 );
	t( cmp.parts[0].type == SS_U32 );

	sr_schemefree(&cmp, &st_r.a);
	ss_buffree(&buf, &st_r.a);
}
Example #14
0
static inline int
sd_buildadd_keyvalue(sdbuild *b, sr *r, sv *v)
{
	/* calculate key size */
	uint32_t keysize = 0;
	int i = 0;
	while (i < r->scheme->count) {
		keysize += sv_keysize(v, r, i);
		i++;
	}
	uint32_t valuesize = sv_valuesize(v, r);
	uint32_t size = keysize + valuesize;

	/* prepare buffer */
	uint64_t lsn = sv_lsn(v);
	uint32_t sizemeta = ss_leb128size(size) + ss_leb128size(lsn);
	int rc = ss_bufensure(&b->v, r->a, sizemeta);
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);

	/* write meta */
	ss_bufadvance(&b->v, ss_leb128write(b->v.p, size));
	ss_bufadvance(&b->v, ss_leb128write(b->v.p, lsn));

	/* write key-parts */
	i = 0;
	for (; i < r->scheme->count; i++)
	{
		uint32_t partsize = sv_keysize(v, r, i);
		char *part = sv_key(v, r, i);

		int offsetstart = ss_bufused(&b->k);
		int offset = (offsetstart - sd_buildref(b)->k);

		/* match a key copy */
		int is_duplicate = 0;
		uint32_t hash = 0;
		int pos = 0;
		if (b->compress_dup) {
			hash = ss_fnv(part, partsize);
			pos = sd_buildsearch(&b->tracker, hash, part, partsize, b);
			if (b->tracker.i[pos]) {
				is_duplicate = 1;
				sdbuildkey *ref = sscast(b->tracker.i[pos], sdbuildkey, node);
				offset = ref->offset;
			}
		}

		/* offset */
		rc = ss_bufensure(&b->v, r->a, ss_leb128size(offset));
		if (ssunlikely(rc == -1))
			return sr_oom(r->e);
		ss_bufadvance(&b->v, ss_leb128write(b->v.p, offset));
		if (is_duplicate)
			continue;

		/* copy key */
		int partsize_meta = ss_leb128size(partsize);
		rc = ss_bufensure(&b->k, r->a, partsize_meta + partsize);
		if (ssunlikely(rc == -1))
			return sr_oom(r->e);
		ss_bufadvance(&b->k, ss_leb128write(b->k.p, partsize));
		memcpy(b->k.p, part, partsize);
		ss_bufadvance(&b->k, partsize);

		/* add key reference */
		if (b->compress_dup) {
			if (ssunlikely(ss_htisfull(&b->tracker))) {
				rc = ss_htresize(&b->tracker, r->a);
				if (ssunlikely(rc == -1))
					return sr_oom(r->e);
			}
			sdbuildkey *ref = ss_malloc(r->a, sizeof(sdbuildkey));
			if (ssunlikely(rc == -1))
				return sr_oom(r->e);
			ref->node.hash = hash;
			ref->offset = offset;
			ref->offsetstart = offsetstart + partsize_meta;
			ref->size = partsize;
			ss_htset(&b->tracker, pos, &ref->node);
		}
	}

	/* write value */
	rc = ss_bufensure(&b->v, r->a, valuesize);
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	memcpy(b->v.p, sv_value(v, r), valuesize);
	ss_bufadvance(&b->v, valuesize);
	return 0;
}
Example #15
0
static inline sibranch*
si_branchcreate(si *index, sdc *c, sinode *parent, svindex *vindex, uint64_t vlsn)
{
	sr *r = index->r;
	sibranch *branch = NULL;

	/* in-memory mode blob */
	int rc;
	ssblob copy, *blob = NULL;
	if (parent->in_memory) {
		ss_blobinit(&copy, r->vfs);
		rc = ss_blobensure(&copy, 10ULL * 1024 * 1024);
		if (ssunlikely(rc == -1)) {
			sr_oom_malfunction(r->e);
			return NULL;
		}
		blob = &copy;
	}

	svmerge vmerge;
	sv_mergeinit(&vmerge);
	rc = sv_mergeprepare(&vmerge, r, 1);
	if (ssunlikely(rc == -1))
		return NULL;
	svmergesrc *s = sv_mergeadd(&vmerge, NULL);
	ss_iterinit(sv_indexiter, &s->src);
	ss_iteropen(sv_indexiter, &s->src, r, vindex, SS_GTE, NULL, 0);
	ssiter i;
	ss_iterinit(sv_mergeiter, &i);
	ss_iteropen(sv_mergeiter, &i, r, &vmerge, SS_GTE);

	/* merge iter is not used */
	sdmergeconf mergeconf = {
		.size_stream     = UINT32_MAX,
		.size_node       = UINT64_MAX,
		.size_page       = index->scheme->node_page_size,
		.checksum        = index->scheme->node_page_checksum,
		.compression_key = index->scheme->compression_key,
		.compression     = index->scheme->compression_branch,
		.compression_if  = index->scheme->compression_branch_if,
		.vlsn            = vlsn,
		.vlsn_lru        = 0,
		.save_delete     = 1,
		.save_upsert     = 1
	};
	sdmerge merge;
	sd_mergeinit(&merge, r, &i, &c->build, &c->upsert, &mergeconf);

	while ((rc = sd_merge(&merge)) > 0)
	{
		assert(branch == NULL);

		/* write open seal */
		uint64_t seal = parent->file.size;
		rc = sd_writeseal(r, &parent->file, blob);
		if (ssunlikely(rc == -1))
			goto e0;

		/* write pages */
		uint64_t offset = parent->file.size;
		while ((rc = sd_mergepage(&merge, offset)) == 1)
		{
			rc = sd_writepage(r, &parent->file, blob, merge.build);
			if (ssunlikely(rc == -1))
				goto e0;
			offset = parent->file.size;
		}
		if (ssunlikely(rc == -1))
			goto e0;
		sdid id = {
			.parent = parent->self.id.id,
			.flags  = SD_IDBRANCH,
			.id     = sr_seq(r->seq, SR_NSNNEXT)
		};
		rc = sd_mergecommit(&merge, &id, parent->file.size);
		if (ssunlikely(rc == -1))
			goto e0;

		/* write index */
		rc = sd_writeindex(r, &parent->file, blob, &merge.index);
		if (ssunlikely(rc == -1))
			goto e0;
		if (index->scheme->sync) {
			rc = ss_filesync(&parent->file);
			if (ssunlikely(rc == -1)) {
				sr_malfunction(r->e, "file '%s' sync error: %s",
				               ss_pathof(&parent->file.path),
				               strerror(errno));
				goto e0;
			}
		}

		SS_INJECTION(r->i, SS_INJECTION_SI_BRANCH_0,
		             sd_mergefree(&merge);
		             sr_malfunction(r->e, "%s", "error injection");
		             return NULL);

		/* seal the branch */
		rc = sd_seal(r, &parent->file, blob, &merge.index, seal);
		if (ssunlikely(rc == -1))
			goto e0;
		if (index->scheme->sync == 2) {
			rc = ss_filesync(&parent->file);
			if (ssunlikely(rc == -1)) {
				sr_malfunction(r->e, "file '%s' sync error: %s",
				               ss_pathof(&parent->file.path),
				               strerror(errno));
				goto e0;
			}
		}

		/* create new branch object */
		branch = si_branchnew(r);
		if (ssunlikely(branch == NULL))
			goto e0;
		si_branchset(branch, &merge.index);
	}
	sv_mergefree(&vmerge, r->a);

	if (ssunlikely(rc == -1)) {
		sr_oom_malfunction(r->e);
		goto e0;
	}
	assert(branch != NULL);

	/* in-memory mode support */
	if (blob) {
		rc = ss_blobfit(blob);
		if (ssunlikely(rc == -1)) {
			ss_blobfree(blob);
			goto e1;
		}
		branch->copy = copy;
	}
	/* mmap support */
	if (index->scheme->mmap) {
		ss_mmapinit(&parent->map_swap);
		rc = ss_vfsmmap(r->vfs, &parent->map_swap, parent->file.fd,
		              parent->file.size, 1);
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "db file '%s' mmap error: %s",
			               ss_pathof(&parent->file.path),
			               strerror(errno));
			goto e1;
		}
	}
	return branch;
e0:
	sd_mergefree(&merge);
	if (blob)
		ss_blobfree(blob);
	return NULL;
e1:
	si_branchfree(branch, r);
	return NULL;
}

int si_branch(si *index, sdc *c, siplan *plan, uint64_t vlsn)
{
	sr *r = index->r;
	sinode *n = plan->node;
	assert(n->flags & SI_LOCK);

	si_lock(index);
	if (ssunlikely(n->used == 0)) {
		si_nodeunlock(n);
		si_unlock(index);
		return 0;
	}
	svindex *i;
	i = si_noderotate(n);
	si_unlock(index);

	sibranch *branch = si_branchcreate(index, c, n, i, vlsn);
	if (ssunlikely(branch == NULL))
		return -1;

	/* commit */
	si_lock(index);
	branch->next = n->branch;
	n->branch->link = branch;
	n->branch = branch;
	n->branch_count++;
	uint32_t used = sv_indexused(i);
	n->used -= used;
	ss_quota(r->quota, SS_QREMOVE, used);
	index->size +=
		sd_indexsize(branch->index.h) +
		sd_indextotal(&branch->index);
	svindex swap = *i;
	si_nodeunrotate(n);
	si_nodeunlock(n);
	si_plannerupdate(&index->p, SI_BRANCH|SI_COMPACT, n);
	ssmmap swap_map = n->map;
	n->map = n->map_swap;
	memset(&n->map_swap, 0, sizeof(n->map_swap));
	si_unlock(index);

	/* gc */
	if (index->scheme->mmap) {
		int rc = ss_vfsmunmap(r->vfs, &swap_map);
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "db file '%s' munmap error: %s",
			               ss_pathof(&n->file.path),
			               strerror(errno));
			return -1;
		}
	}
	si_nodegc_index(r, &swap);
	return 1;
}

int si_compact(si *index, sdc *c, siplan *plan,
               uint64_t vlsn,
               uint64_t vlsn_lru,
               ssiter *vindex,
               uint64_t vindex_used)
{
	sr *r = index->r;
	sinode *node = plan->node;
	assert(node->flags & SI_LOCK);

	/* prepare for compaction */
	int rc;
	rc = sd_censure(c, r, node->branch_count);
	if (ssunlikely(rc == -1))
		return sr_oom_malfunction(r->e);
	svmerge merge;
	sv_mergeinit(&merge);
	rc = sv_mergeprepare(&merge, r, node->branch_count + 1);
	if (ssunlikely(rc == -1))
		return -1;

	/* read node file into memory */
	int use_mmap = index->scheme->mmap;
	ssmmap *map = &node->map;
	ssmmap  preload;
	if (index->scheme->node_compact_load) {
		rc = si_noderead(node, r, &c->c);
		if (ssunlikely(rc == -1))
			return -1;
		preload.p = c->c.s;
		preload.size = ss_bufused(&c->c);
		map = &preload;
		use_mmap = 1;
	}

	/* include vindex into merge process */
	svmergesrc *s;
	uint64_t size_stream = 0;
	if (vindex) {
		s = sv_mergeadd(&merge, vindex);
		size_stream = vindex_used;
	}

	sdcbuf *cbuf = c->head;
	sibranch *b = node->branch;
	while (b) {
		s = sv_mergeadd(&merge, NULL);
		/* choose compression type */
		int compression;
		ssfilterif *compression_if;
		if (! si_branchis_root(b)) {
			compression    = index->scheme->compression_branch;
			compression_if = index->scheme->compression_branch_if;
		} else {
			compression    = index->scheme->compression;
			compression_if = index->scheme->compression_if;
		}
		sdreadarg arg = {
			.index           = &b->index,
			.buf             = &cbuf->a,
			.buf_xf          = &cbuf->b,
			.buf_read        = &c->d,
			.index_iter      = &cbuf->index_iter,
			.page_iter       = &cbuf->page_iter,
			.use_memory      = node->in_memory,
			.use_mmap        = use_mmap,
			.use_mmap_copy   = 0,
			.use_compression = compression,
			.compression_if  = compression_if,
			.has             = 0,
			.has_vlsn        = 0,
			.o               = SS_GTE,
			.memory          = &b->copy,
			.mmap            = map,
			.file            = &node->file,
			.r               = r
		};
		ss_iterinit(sd_read, &s->src);
		int rc = ss_iteropen(sd_read, &s->src, &arg, NULL, 0);
		if (ssunlikely(rc == -1))
			return sr_oom_malfunction(r->e);
		size_stream += sd_indextotal(&b->index);
		cbuf = cbuf->next;
		b = b->next;
	}
	ssiter i;
	ss_iterinit(sv_mergeiter, &i);
	ss_iteropen(sv_mergeiter, &i, r, &merge, SS_GTE);
	rc = si_merge(index, c, node, vlsn, vlsn_lru, &i, size_stream);
	sv_mergefree(&merge, r->a);
	return rc;
}

int si_compact_index(si *index, sdc *c, siplan *plan,
                     uint64_t vlsn,
                     uint64_t vlsn_lru)
{
	sinode *node = plan->node;

	si_lock(index);
	if (ssunlikely(node->used == 0)) {
		si_nodeunlock(node);
		si_unlock(index);
		return 0;
	}
	svindex *vindex;
	vindex = si_noderotate(node);
	si_unlock(index);

	uint64_t size_stream = sv_indexused(vindex);
	ssiter i;
	ss_iterinit(sv_indexiter, &i);
	ss_iteropen(sv_indexiter, &i, index->r, vindex, SS_GTE, NULL, 0);
	return si_compact(index, c, plan, vlsn, vlsn_lru, &i, size_stream);
}
Example #16
0
int si_snapshot(si *index, siplan *plan)
{
	sr *r = index->r;

	ssfile file;
	ss_fileinit(&file, r->vfs);

	/* prepare to take snapshot */
	sdsnapshot snapshot;
	sd_snapshot_init(&snapshot);
	int rc = ss_bufensure(&snapshot.buf, r->a, 1 * 1024 * 1024);
	if (ssunlikely(rc == -1))
		goto error_oom;
	rc = sd_snapshot_begin(&snapshot, r);
	if (ssunlikely(rc == -1))
		goto error_oom;

	/* save node index image */
	si_lock(index);
	ssrbnode *p = NULL;
	while ((p = ss_rbnext(&index->i, p)))
	{
		sinode *n = sscast(p, sinode, node);
		rc = sd_snapshot_add(&snapshot, r, n->self.id.id,
		                     n->file.size,
		                     n->branch_count,
		                     n->temperature_reads);
		if (ssunlikely(rc == -1)) {
			si_unlock(index);
			goto error_oom;
		}
		sibranch *b = &n->self;
		while (b) {
			rc = sd_snapshot_addbranch(&snapshot, r, b->index.h);
			if (ssunlikely(rc == -1)) {
				si_unlock(index);
				goto error_oom;
			}
			b = b->link;
		}
	}
	sd_snapshot_commit(&snapshot, r,
	                   index->lru_v,
	                   index->lru_steps,
	                   index->lru_intr_lsn,
	                   index->lru_intr_sum,
	                   index->read_disk,
	                   index->read_cache);
	si_unlock(index);

	/* create snapshot.inprogress */
	char path[PATH_MAX];
	snprintf(path, sizeof(path), "%s/index.incomplete",
	         index->scheme->path);
	rc = ss_filenew(&file, path);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' create error: %s",
		               path, strerror(errno));
		goto error;
	}
	rc = ss_filewrite(&file, snapshot.buf.s, ss_bufused(&snapshot.buf));
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' write error: %s",
		               path, strerror(errno));
		goto error;
	}

	SS_INJECTION(r->i, SS_INJECTION_SI_SNAPSHOT_0,
	             ss_fileclose(&file);
	             sd_snapshot_free(&snapshot, r);
				 sr_malfunction(r->e, "%s", "error injection");
				 return -1);

	/* sync snapshot file */
	if (index->scheme->sync) {
		rc = ss_filesync(&file);
		if (ssunlikely(rc == -1)) {
			sr_malfunction(r->e, "index file '%s' sync error: %s",
			               path, strerror(errno));
			goto error;
		}
	}

	/* remove old snapshot file (if exists) */
	snprintf(path, sizeof(path), "%s/index", index->scheme->path);
	ss_vfsunlink(r->vfs, path);

	SS_INJECTION(r->i, SS_INJECTION_SI_SNAPSHOT_1,
	             ss_fileclose(&file);
	             sd_snapshot_free(&snapshot, r);
				 sr_malfunction(r->e, "%s", "error injection");
				 return -1);

	/* rename snapshot.incomplete to snapshot */
	rc = ss_filerename(&file, path);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' rename error: %s",
		               ss_pathof(&file.path),
		               strerror(errno));
		goto error;
	}

	SS_INJECTION(r->i, SS_INJECTION_SI_SNAPSHOT_2,
	             ss_fileclose(&file);
	             sd_snapshot_free(&snapshot, r);
				 sr_malfunction(r->e, "%s", "error injection");
				 return -1);

	/* close snapshot file */
	rc = ss_fileclose(&file);
	if (ssunlikely(rc == -1)) {
		sr_malfunction(r->e, "index file '%s' close error: %s",
		               path, strerror(errno));
		goto error;
	}

	sd_snapshot_free(&snapshot, r);

	/* finish index snapshot */
	si_lock(index);
	index->snapshot = plan->a;
	index->snapshot_run = 0;
	si_unlock(index);
	return 0;

error_oom:
	sr_oom(r->e);
error:
	ss_fileclose(&file);
	sd_snapshot_free(&snapshot, r);
	return -1;
}
Example #17
0
static inline int
si_split(si *index, sdc *c, ssbuf *result,
         sinode   *parent,
         ssiter   *i,
         uint64_t  size_node,
         uint32_t  size_stream,
         uint64_t  vlsn)
{
	sr *r = index->r;
	int count = 0;
	int rc;
	sdmergeconf mergeconf = {
		.size_stream     = size_stream,
		.size_node       = size_node,
		.size_page       = index->scheme->node_page_size,
		.checksum        = index->scheme->node_page_checksum,
		.compression     = index->scheme->compression,
		.compression_key = index->scheme->compression_key,
		.offset          = 0,
		.vlsn            = vlsn,
		.save_delete     = 0,
		.save_update     = 0
	};
	sdmerge merge;
	sd_mergeinit(&merge, r, i, &c->build, &c->update, &mergeconf);
	while ((rc = sd_merge(&merge)) > 0)
	{
		sinode *n = si_nodenew(r);
		if (ssunlikely(n == NULL))
			goto error;
		sdid id = {
			.parent = parent->self.id.id,
			.flags  = 0,
			.id     = sr_seq(index->r->seq, SR_NSNNEXT)
		};
		rc = sd_mergecommit(&merge, &id);
		if (ssunlikely(rc == -1))
			goto error;
		rc = si_nodecreate(n, r, index->scheme, &id, &merge.index, &c->build);
		if (ssunlikely(rc == -1))
			goto error;
		rc = ss_bufadd(result, index->r->a, &n, sizeof(sinode*));
		if (ssunlikely(rc == -1)) {
			sr_oom_malfunction(index->r->e);
			si_nodefree(n, r, 1);
			goto error;
		}
		sd_buildreset(&c->build);
		count++;
	}
	if (ssunlikely(rc == -1))
		goto error;
	return 0;
error:
	si_splitfree(result, r);
	sd_mergefree(&merge);
	return -1;
}

int si_compaction(si *index, sdc *c, uint64_t vlsn,
                  sinode *node,
                  ssiter *stream, uint32_t size_stream)
{
	sr *r = index->r;
	ssbuf *result = &c->a;
	ssiter i;

	/* begin compaction.
	 *
	 * split merge stream into a number
	 * of a new nodes.
	 */
	int rc;
	rc = si_split(index, c, result,
	              node, stream,
	              index->scheme->node_size,
	              size_stream,
	              vlsn);
	if (ssunlikely(rc == -1))
		return -1;

	SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_0,
	             si_splitfree(result, r);
	             sr_malfunction(r->e, "%s", "error injection");
	             return -1);

	/* mask removal of a single node as a
	 * single node update */
	int count = ss_bufused(result) / sizeof(sinode*);
	int count_index;

	si_lock(index);
	count_index = index->n;
	si_unlock(index);

	sinode *n;
	if (ssunlikely(count == 0 && count_index == 1))
	{
		n = si_bootstrap(index, node->self.id.id);
		if (ssunlikely(n == NULL))
			return -1;
		rc = ss_bufadd(result, r->a, &n, sizeof(sinode*));
		if (ssunlikely(rc == -1)) {
			sr_oom_malfunction(r->e);
			si_nodefree(n, r, 1);
			return -1;
		}
		count++;
	}

	/* commit compaction changes */
	si_lock(index);
	svindex *j = si_nodeindex(node);
	si_plannerremove(&index->p, SI_COMPACT|SI_BRANCH, node);
	switch (count) {
	case 0: /* delete */
		si_remove(index, node);
		si_redistribute_index(index, r, c, node);
		uint32_t used = sv_indexused(j);
		if (used) {
			ss_quota(r->quota, SS_QREMOVE, used);
		}
		break;
	case 1: /* self update */
		n = *(sinode**)result->s;
		n->i0   = *j;
		n->used = sv_indexused(j);
		si_nodelock(n);
		si_replace(index, node, n);
		si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n);
		break;
	default: /* split */
		rc = si_redistribute(index, r, c, node, result);
		if (ssunlikely(rc == -1)) {
			si_unlock(index);
			si_splitfree(result, r);
			return -1;
		}
		ss_iterinit(ss_bufiterref, &i);
		ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
		n = ss_iterof(ss_bufiterref, &i);
		n->used = sv_indexused(&n->i0);
		si_nodelock(n);
		si_replace(index, node, n);
		si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n);
		for (ss_iternext(ss_bufiterref, &i); ss_iterhas(ss_bufiterref, &i);
		     ss_iternext(ss_bufiterref, &i)) {
			n = ss_iterof(ss_bufiterref, &i);
			n->used = sv_indexused(&n->i0);
			si_nodelock(n);
			si_insert(index, n);
			si_plannerupdate(&index->p, SI_COMPACT|SI_BRANCH, n);
		}
		break;
	}
	sv_indexinit(j);
	si_unlock(index);

	/* compaction completion */

	/* seal nodes */
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
	while (ss_iterhas(ss_bufiterref, &i))
	{
		n = ss_iterof(ss_bufiterref, &i);
		if (index->scheme->sync) {
			rc = si_nodesync(n, r);
			if (ssunlikely(rc == -1))
				return -1;
		}
		rc = si_nodeseal(n, r, index->scheme);
		if (ssunlikely(rc == -1))
			return -1;
		SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_3,
		             si_nodefree(node, r, 0);
		             sr_malfunction(r->e, "%s", "error injection");
		             return -1);
		ss_iternext(ss_bufiterref, &i);
	}

	SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_1,
	             si_nodefree(node, r, 0);
	             sr_malfunction(r->e, "%s", "error injection");
	             return -1);

	/* gc old node */
	rc = si_nodefree(node, r, 1);
	if (ssunlikely(rc == -1))
		return -1;

	SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_2,
	             sr_malfunction(r->e, "%s", "error injection");
	             return -1);

	/* complete new nodes */
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
	while (ss_iterhas(ss_bufiterref, &i))
	{
		n = ss_iterof(ss_bufiterref, &i);
		rc = si_nodecomplete(n, r, index->scheme);
		if (ssunlikely(rc == -1))
			return -1;
		SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_4,
		             sr_malfunction(r->e, "%s", "error injection");
		             return -1);
		ss_iternext(ss_bufiterref, &i);
	}

	/* unlock */
	si_lock(index);
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
	while (ss_iterhas(ss_bufiterref, &i))
	{
		n = ss_iterof(ss_bufiterref, &i);
		si_nodeunlock(n);
		ss_iternext(ss_bufiterref, &i);
	}
	si_unlock(index);
	return 0;
}
Example #18
0
int sd_indexadd(sdindex *i, sr *r, sdbuild *build, uint64_t offset)
{
	int rc = ss_bufensure(&i->i, r->a, sizeof(sdindexpage));
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	sdpageheader *ph = sd_buildheader(build);

	int size = ph->size + sizeof(sdpageheader);
	int sizeorigin = ph->sizeorigin + sizeof(sdpageheader);

	/* prepare page header */
	sdindexpage *p = (sdindexpage*)i->i.p;
	p->offset      = offset;
	p->offsetindex = ss_bufused(&i->v);
	p->lsnmin      = ph->lsnmin;
	p->lsnmax      = ph->lsnmax;
	p->size        = size;
	p->sizeorigin  = sizeorigin;
	p->sizemin     = 0;
	p->sizemax     = 0;

	/* copy keys */
	if (ssunlikely(ph->count > 0))
	{
		char *min;
		char *max;
		min  = sd_buildminkey(build);
		min += ss_leb128skip(min);
		min += ss_leb128skip(min);
		max  = sd_buildmaxkey(build);
		max += ss_leb128skip(max);
		max += ss_leb128skip(max);
		switch (r->fmt_storage) {
		case SF_SRAW:
			rc = sd_indexadd_raw(i, r, p, min, max);
			break;
		case SF_SKEYVALUE:
			rc = sd_indexadd_keyvalue(i, r, build, p, min, max);
			break;
		}
		if (ssunlikely(rc == -1))
			return -1;
	}

	/* update index info */
	sdindexheader *h = sd_indexheader(i);
	h->count++;
	h->size  += sizeof(sdindexpage) + p->sizemin + p->sizemax;
	h->keys  += ph->count;
	h->total += size;
	h->totalorigin += sizeorigin;
	if (build->vmax > h->sizevmax)
		h->sizevmax = build->vmax;
	if (ph->lsnmin < h->lsnmin)
		h->lsnmin = ph->lsnmin;
	if (ph->lsnmax > h->lsnmax)
		h->lsnmax = ph->lsnmax;
	h->dupkeys += ph->countdup;
	if (ph->lsnmindup < h->dupmin)
		h->dupmin = ph->lsnmindup;
	ss_bufadvance(&i->i, sizeof(sdindexpage));
	return 0;
}
Example #19
0
static inline int
si_split(si *index, sdc *c, ssbuf *result,
         sinode   *parent,
         ssiter   *i,
         uint64_t  size_node,
         uint64_t  size_stream,
         uint32_t  stream,
         uint64_t  vlsn)
{
	sr *r = &index->r;
	uint32_t timestamp = ss_timestamp();
	int rc;
	sdmergeconf mergeconf = {
		.stream              = stream,
		.size_stream         = size_stream,
		.size_node           = size_node,
		.size_page           = index->scheme.compaction.node_page_size,
		.checksum            = index->scheme.compaction.node_page_checksum,
		.expire              = index->scheme.expire,
		.timestamp           = timestamp,
		.compression         = index->scheme.compression,
		.compression_if      = index->scheme.compression_if,
		.direct_io           = index->scheme.direct_io,
		.direct_io_page_size = index->scheme.direct_io_page_size,
		.vlsn                = vlsn
	};
	sinode *n = NULL;
	sdmerge merge;
	rc = sd_mergeinit(&merge, r, i, &c->build, &c->build_index,
	                  &c->upsert, &mergeconf);
	if (ssunlikely(rc == -1))
		return -1;
	while ((rc = sd_merge(&merge)) > 0)
	{
		/* create new node */
		uint64_t id = sr_seq(index->r.seq, SR_NSNNEXT);
		n = si_nodenew(r, id, parent->id);
		if (ssunlikely(n == NULL))
			goto error;
		rc = si_nodecreate(n, r, &index->scheme);
		if (ssunlikely(rc == -1))
			goto error;

		/* write pages */
		uint64_t offset;
		offset = sd_iosize(&c->io, &n->file);
		while ((rc = sd_mergepage(&merge, offset)) == 1) {
			rc = sd_writepage(r, &n->file, &c->io, merge.build);
			if (ssunlikely(rc == -1))
				goto error;
			offset = sd_iosize(&c->io, &n->file);
		}
		if (ssunlikely(rc == -1))
			goto error;

		offset = sd_iosize(&c->io, &n->file);
		rc = sd_mergeend(&merge, offset);
		if (ssunlikely(rc == -1))
			goto error;

		/* write index */
		rc = sd_writeindex(r, &n->file, &c->io, &merge.index);
		if (ssunlikely(rc == -1))
			goto error;

		/* mmap mode */
		if (index->scheme.mmap) {
			rc = si_nodemap(n, r);
			if (ssunlikely(rc == -1))
				goto error;
		}

		/* add node to the list */
		rc = ss_bufadd(result, index->r.a, &n, sizeof(sinode*));
		if (ssunlikely(rc == -1)) {
			sr_oom_malfunction(index->r.e);
			goto error;
		}

		n->index = merge.index;
	}
	if (ssunlikely(rc == -1))
		goto error;
	return 0;
error:
	if (n)
		si_nodefree(n, r, 0);
	sd_mergefree(&merge);
	si_splitfree(result, r);
	return -1;
}

static int
si_merge(si *index, sdc *c, sinode *node,
         uint64_t vlsn,
         ssiter *stream,
         uint64_t size_stream,
         uint32_t n_stream)
{
	sr *r = &index->r;
	ssbuf *result = &c->a;
	ssiter i;

	/* begin compaction.
	 *
	 * Split merge stream into a number of
	 * a new nodes.
	 */
	int rc;
	rc = si_split(index, c, result,
	              node, stream,
	              index->scheme.compaction.node_size,
	              size_stream,
	              n_stream,
	              vlsn);
	if (ssunlikely(rc == -1))
		return -1;

	SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_0,
	             si_splitfree(result, r);
	             sr_malfunction(r->e, "%s", "error injection");
	             return -1);

	/* mask removal of a single node as a
	 * single node update */
	int count = ss_bufused(result) / sizeof(sinode*);
	int count_index;

	si_lock(index);
	count_index = index->n;
	si_unlock(index);

	sinode *n;
	if (ssunlikely(count == 0 && count_index == 1))
	{
		n = si_bootstrap(index, node->id);
		if (ssunlikely(n == NULL))
			return -1;
		rc = ss_bufadd(result, r->a, &n, sizeof(sinode*));
		if (ssunlikely(rc == -1)) {
			sr_oom_malfunction(r->e);
			si_nodefree(n, r, 1);
			return -1;
		}
		count++;
	}

	/* commit compaction changes */
	si_lock(index);
	svindex *j = si_nodeindex(node);
	si_plannerremove(&index->p, node);
	si_nodesplit(node);
	switch (count) {
	case 0: /* delete */
		si_remove(index, node);
		si_redistribute_index(index, r, c, node);
		break;
	case 1: /* self update */
		n = *(sinode**)result->s;
		n->i0 = *j;
		n->used = j->used;
		si_nodelock(n);
		si_replace(index, node, n);
		si_plannerupdate(&index->p, n);
		break;
	default: /* split */
		rc = si_redistribute(index, r, c, node, result);
		if (ssunlikely(rc == -1)) {
			si_unlock(index);
			si_splitfree(result, r);
			return -1;
		}
		ss_iterinit(ss_bufiterref, &i);
		ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
		n = ss_iterof(ss_bufiterref, &i);
		n->used = n->i0.used;
		si_nodelock(n);
		si_replace(index, node, n);
		si_plannerupdate(&index->p, n);
		for (ss_iternext(ss_bufiterref, &i); ss_iterhas(ss_bufiterref, &i);
		     ss_iternext(ss_bufiterref, &i)) {
			n = ss_iterof(ss_bufiterref, &i);
			n->used = n->i0.used;
			si_nodelock(n);
			si_insert(index, n);
			si_plannerupdate(&index->p, n);
		}
		break;
	}
	sv_indexinit(j);
	si_unlock(index);

	/* compaction completion */

	/* seal nodes */
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
	while (ss_iterhas(ss_bufiterref, &i))
	{
		n  = ss_iterof(ss_bufiterref, &i);
		if (index->scheme.sync) {
			rc = ss_filesync(&n->file);
			if (ssunlikely(rc == -1)) {
				sr_malfunction(r->e, "db file '%s' sync error: %s",
				               ss_pathof(&n->file.path),
				               strerror(errno));
				return -1;
			}
		}
		rc = si_noderename_seal(n, r, &index->scheme);
		if (ssunlikely(rc == -1)) {
			si_nodefree(node, r, 0);
			return -1;
		}
		SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_3,
		             si_nodefree(node, r, 0);
		             sr_malfunction(r->e, "%s", "error injection");
		             return -1);
		ss_iternext(ss_bufiterref, &i);
	}

	SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_1,
	             si_nodefree(node, r, 0);
	             sr_malfunction(r->e, "%s", "error injection");
	             return -1);

	/* gc node */
	uint16_t refs = si_noderefof(node);
	if (sslikely(refs == 0)) {
		rc = si_nodefree(node, r, 1);
		if (ssunlikely(rc == -1))
			return -1;
	} else {
		/* node concurrently being read, schedule for
		 * delayed removal */
		si_nodegc(node, r, &index->scheme);
		si_lock(index);
		ss_listappend(&index->gc, &node->gc);
		index->gc_count++;
		si_unlock(index);
	}

	SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_2,
	             sr_malfunction(r->e, "%s", "error injection");
	             return -1);

	/* complete new nodes */
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
	while (ss_iterhas(ss_bufiterref, &i))
	{
		n = ss_iterof(ss_bufiterref, &i);
		rc = si_noderename_complete(n, r, &index->scheme);
		if (ssunlikely(rc == -1))
			return -1;
		SS_INJECTION(r->i, SS_INJECTION_SI_COMPACTION_4,
		             sr_malfunction(r->e, "%s", "error injection");
		             return -1);
		ss_iternext(ss_bufiterref, &i);
	}

	/* unlock */
	si_lock(index);
	ss_iterinit(ss_bufiterref, &i);
	ss_iteropen(ss_bufiterref, &i, result, sizeof(sinode*));
	while (ss_iterhas(ss_bufiterref, &i))
	{
		n = ss_iterof(ss_bufiterref, &i);
		si_nodeunlock(n);
		ss_iternext(ss_bufiterref, &i);
	}
	si_unlock(index);
	return 0;
}
Example #20
0
	ssbuf compressed;
	ss_bufinit(&compressed);

	ssfilter f;
	t( ss_filterinit(&f, &ss_zstdfilter, &a, SS_FINPUT) == 0 );
	t( ss_filterstart(&f, &compressed) == 0 );
	t( ss_filternext(&f, &compressed, text, sizeof(text) - 1) == 0 );
	t( ss_filtercomplete(&f, &compressed) == 0 );
	t( ss_filterfree(&f) == 0 );

	ssbuf decompressed;
	ss_bufinit(&decompressed);
	t( ss_bufensure(&decompressed, &a, sizeof(text)) == 0 );

	t( ss_filterinit(&f, &ss_zstdfilter, &a, SS_FOUTPUT) == 0 );
	t( ss_filternext(&f, &decompressed, compressed.s, ss_bufused(&compressed)) == 0 );
	t( ss_filterfree(&f) == 0 );

	t( memcmp(text, decompressed.s, sizeof(text) - 1) == 0 );

	ss_buffree(&compressed, &a);
	ss_buffree(&decompressed, &a);
}

stgroup *sszstdfilter_group(void)
{
	stgroup *group = st_group("sszstdfilter");
	st_groupadd(group, st_test("compress_decompress", sszstdfilter_compress_decompress));
	return group;
}
Example #21
0
int si_schemedeploy(sischeme *s, sr *r)
{
	sdscheme c;
	sd_schemeinit(&c);
	int rc;
	rc = sd_schemebegin(&c, r);
	if (ssunlikely(rc == -1))
		return -1;
	ssbuf buf;
	ss_bufinit(&buf);
	rc = sd_schemeadd(&c, r, SI_SCHEME_VERSION, SS_STRING, &s->version,
	                  sizeof(s->version));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_VERSION_STORAGE, SS_STRING,
	                  &s->version_storage, sizeof(s->version_storage));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_NAME, SS_STRING, s->name,
	                  strlen(s->name) + 1);
	if (ssunlikely(rc == -1))
		goto error;
	rc = sf_schemesave(&s->scheme, r->a, &buf);
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_SCHEME, SS_STRING, buf.s,
	                  ss_bufused(&buf));
	if (ssunlikely(rc == -1))
		goto error;
	ss_buffree(&buf, r->a);
	rc = sd_schemeadd(&c, r, SI_SCHEME_NODE_SIZE, SS_U64,
	                  &s->compaction.node_size,
	                  sizeof(s->compaction.node_size));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_NODE_PAGE_SIZE, SS_U32,
	                  &s->compaction.node_page_size,
	                  sizeof(s->compaction.node_page_size));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_NODE_PAGE_CHECKSUM, SS_U32,
	                  &s->compaction.node_page_checksum,
	                  sizeof(s->compaction.node_page_checksum));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_COMPRESSION, SS_STRING,
	                  s->compression_if->name,
	                  strlen(s->compression_if->name) + 1);
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_EXPIRE, SS_U32,
	                  &s->expire, sizeof(s->expire));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemecommit(&c, r);
	if (ssunlikely(rc == -1))
		return -1;
	char path[PATH_MAX];
	snprintf(path, sizeof(path), "%s/scheme", s->path);
	rc = sd_schemewrite(&c, r, path, 0);
	sd_schemefree(&c, r);
	return rc;
error:
	ss_buffree(&buf, r->a);
	sd_schemefree(&c, r);
	return -1;
}
Example #22
0
int si_schemedeploy(sischeme *s, sr *r)
{
	sdscheme c;
	sd_schemeinit(&c);
	int rc;
	rc = sd_schemebegin(&c, r);
	if (ssunlikely(rc == -1))
		return -1;
	ssbuf buf;
	ss_bufinit(&buf);
	rc = sd_schemeadd(&c, r, SI_SCHEME_NAME, SS_STRING, s->name,
	                  strlen(s->name) + 1);
	if (ssunlikely(rc == -1))
		goto error;
	rc = sr_schemesave(&s->scheme, r->a, &buf);
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_SCHEME, SS_STRING, buf.s,
	                  ss_bufused(&buf));
	if (ssunlikely(rc == -1))
		goto error;
	ss_buffree(&buf, r->a);
	uint32_t v = s->fmt;
	rc = sd_schemeadd(&c, r, SI_SCHEME_FORMAT, SS_U32, &v, sizeof(v));
	if (ssunlikely(rc == -1))
		goto error;
	v = s->fmt_storage;
	rc = sd_schemeadd(&c, r, SI_SCHEME_FORMAT_STORAGE, SS_U32, &v, sizeof(v));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_NODE_SIZE, SS_U64,
	                  &s->node_size,
	                  sizeof(s->node_size));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_NODE_PAGE_SIZE, SS_U32,
	                  &s->node_page_size,
	                  sizeof(s->node_page_size));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_NODE_PAGE_CHECKSUM, SS_U32,
	                  &s->node_page_checksum,
	                  sizeof(s->node_page_checksum));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_SYNC, SS_U32,
	                  &s->sync,
	                  sizeof(s->sync));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_COMPRESSION, SS_STRING,
	                  s->compression_if->name,
	                  strlen(s->compression_if->name) + 1);
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_COMPRESSION_BRANCH, SS_STRING,
	                  s->compression_branch_if->name,
	                  strlen(s->compression_branch_if->name) + 1);
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_COMPRESSION_KEY, SS_U32,
	                  &s->compression_key,
	                  sizeof(s->compression_key));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_AMQF, SS_U32,
	                  &s->amqf, sizeof(s->amqf));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_CACHE_MODE, SS_U32,
	                  &s->cache_mode, sizeof(s->cache_mode));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemeadd(&c, r, SI_SCHEME_EXPIRE, SS_U32,
	                  &s->expire, sizeof(s->expire));
	if (ssunlikely(rc == -1))
		goto error;
	rc = sd_schemecommit(&c, r);
	if (ssunlikely(rc == -1))
		return -1;
	char path[PATH_MAX];
	snprintf(path, sizeof(path), "%s/scheme", s->path);
	rc = sd_schemewrite(&c, r, path, 0);
	sd_schemefree(&c, r);
	return rc;
error:
	ss_buffree(&buf, r->a);
	sd_schemefree(&c, r);
	return -1;
}