Пример #1
0
static inline int
si_getresult(siread *q, sv *v, int compare)
{
	int rc;
	if (compare) {
		rc = sr_compare(q->r->scheme, sv_pointer(v), sv_size(v),
		                q->key, q->keysize);
		if (ssunlikely(rc != 0))
			return 0;
	}
	if (q->prefix) {
		rc = sr_compareprefix(q->r->scheme,
		                      q->prefix,
		                      q->prefixsize,
		                      sv_pointer(v), sv_size(v));
		if (ssunlikely(! rc))
			return 0;
	}
	if (ssunlikely(q->has))
		return sv_lsn(v) > q->vlsn;
	if (ssunlikely(sv_is(v, SVDELETE)))
		return 2;
	rc = si_readdup(q, v);
	if (ssunlikely(rc == -1))
		return -1;
	return 1;
}
Пример #2
0
int sd_buildadd(sdbuild *b, sr *r, sv *v, uint32_t flags)
{
	/* prepare metadata reference */
	int rc = sr_bufensure(&b->k, r->a, sizeof(sdv));
	if (srunlikely(rc == -1))
		return sr_error(r->e, "%s", "memory allocation failed");
	sdpageheader *h = sd_buildheader(b);
	sdv *sv = (sdv*)b->k.p;
	sv->lsn    = sv_lsn(v);
	sv->flags  = sv_flags(v) | flags;
	sv->size   = sv_size(v);
	sv->offset = sr_bufused(&b->v) - sd_buildref(b)->v;
	/* copy object */
	rc = sr_bufensure(&b->v, r->a, sv->size);
	if (srunlikely(rc == -1))
		return sr_error(r->e, "%s", "memory allocation failed");
	memcpy(b->v.p, sv_pointer(v), sv->size);
	sr_bufadvance(&b->v, sv->size);
	sr_bufadvance(&b->k, sizeof(sdv));
	/* update page header */
	h->count++;
	h->size +=  sv->size + sizeof(sdv);
	if (sv->lsn > h->lsnmax)
		h->lsnmax = sv->lsn;
	if (sv->lsn < h->lsnmin)
		h->lsnmin = sv->lsn;
	if (sv->flags & SVDUP) {
		h->countdup++;
		if (sv->lsn < h->lsnmindup)
			h->lsnmindup = sv->lsn;
	}
	return 0;
}
Пример #3
0
int sx_get(sx *t, sxindex *index, sv *key, sv *result)
{
	sxmanager *m = t->manager;
	ssrbnode *n = NULL;
	int rc = sx_match(&index->i, index->scheme,
	                  sv_pointer(key),
	                  sv_size(key), &n);
	if (! (rc == 0 && n)) {
		rc = 0;
		goto done;
	}
	sxv *head = sscast(n, sxv, node);
	sxv *v = sx_vmatch(head, t->id);
	if (v == NULL) {
		rc = 0;
		goto done;
	}
	if (ssunlikely((v->v->flags & SVDELETE) > 0)) {
		rc = 2;
		goto done;
	}
	sv vv;
	sv_init(&vv, &sv_vif, v->v, NULL);
	svv *ret = sv_vdup(m->a, &vv);
	if (ssunlikely(ret == NULL)) {
		rc = sr_oom(index->r->e);
	} else {
		sv_init(result, &sv_vif, ret, NULL);
		rc = 1;
	}
done:
	return rc;
}
Пример #4
0
sxstate sx_setstmt(sxmanager *m, sxindex *index, sv *v)
{
	sr_seq(m->seq, SR_TSNNEXT);
	ssrbnode *n = NULL;
	int rc = sx_match(&index->i, index->scheme, sv_pointer(v), sv_size(v), &n);
	if (rc == 0 && n)
		return SXLOCK;
	return SXCOMMIT;
}
Пример #5
0
static void
svv_kv(stc *cx)
{
	sra a;
	sr_aopen(&a, &sr_stda);

	srkey cmp;
	sr_keyinit(&cmp);
	srkeypart *part = sr_keyadd(&cmp, &a);
	t( sr_keypart_setname(part, &a, "key") == 0 );
	t( sr_keypart_set(part, &a, "u32") == 0 );
	sr r;
	sr_init(&r, NULL, &a, NULL, SR_FKV, &cmp, NULL, NULL, NULL);

	uint32_t key = 123;
	uint32_t value = 321;

	srformatv pv;
	pv.key = (char*)&key;
	pv.r.size = sizeof(key);
	pv.r.offset = 0;

	svv *vv = sv_vbuild(&r, &pv, 1, (char*)&value, sizeof(value));
	t( vv != NULL );
	vv->flags = SVSET;
	vv->lsn = 10;
	sv v;
	sv_init(&v, &sv_vif, vv, NULL);

	t( sv_flags(&v) == SVSET );
	t( sv_lsn(&v) == 10 );
	sv_lsnset(&v, 8);
	t( sv_lsn(&v) == 8 );

	t( *(uint32_t*)sr_formatkey(sv_pointer(&v), 0) == key );
	t( sr_formatkey_size(sv_pointer(&v), 0) == sizeof(key) );

	t( *(uint32_t*)sr_formatvalue(SR_FKV, &cmp, sv_pointer(&v)) == value );
	t( sr_formatvalue_size(SR_FKV, &cmp, sv_pointer(&v), sv_size(&v) ) == sizeof(value) );

	sv_vfree(&a, vv);
	sr_keyfree(&cmp, &a);
}
Пример #6
0
static int
si_readcommited_branch(sr *r, sibranch *b, sv *v)
{
	ssiter i;
	ss_iterinit(sd_indexiter, &i);
	ss_iteropen(sd_indexiter, &i, r, &b->index, SS_GTE,
	            sv_pointer(v), sv_size(v));
	sdindexpage *page = ss_iterof(sd_indexiter, &i);
	if (page == NULL)
		return 0;
	return page->lsnmax >= sv_lsn(v);
}
Пример #7
0
static inline int
sd_buildadd_raw(sdbuild *b, sr *r, sv *v)
{
	uint64_t lsn = sv_lsn(v);
	uint32_t size = sv_size(v);
	uint32_t sizemeta = ss_leb128size(size) + ss_leb128size(lsn);
	int rc = ss_bufensure(&b->v, r->a, sizemeta + size);
	if (ssunlikely(rc == -1))
		return sr_oom(r->e);
	ss_bufadvance(&b->v, ss_leb128write(b->v.p, size));
	ss_bufadvance(&b->v, ss_leb128write(b->v.p, lsn));
	memcpy(b->v.p, sv_pointer(v), size);
	ss_bufadvance(&b->v, size);
	return 0;
}
Пример #8
0
int se_reqread(sereq *r)
{
	sereqarg *arg = &r->arg;
	sedb *db = (sedb*)r->db;
	uint32_t keysize;
	void *key;
	if (sslikely(arg->v.v)) {
		keysize = sv_size(&arg->v);
		key = sv_pointer(&arg->v);
	} else {
		keysize = 0;
		key = NULL;
	}
	char *prefix;
	uint32_t prefixsize;
	if (arg->vprefix.v) {
		void *vptr = sv_vpointer(arg->vprefix.v);
		prefix = sf_key(vptr, 0);
		prefixsize = sf_keysize(vptr, 0);
	} else {
		prefix = NULL;
		prefixsize = 0;
	}
	if (sslikely(arg->vlsn_generate))
		arg->vlsn = sr_seq(db->r.seq, SR_LSN);
	sitx x;
	si_begin(&x, &db->index, 1);
	siread q;
	si_readopen(&q, &x, arg->cache,
	            arg->order,
	            arg->vlsn,
	            prefix,
	            prefixsize, key, keysize);
	if (arg->update)
		si_readupdate(&q, &arg->vup, arg->update_eq);
	if (arg->cache_only)
		si_readcache_only(&q);
	if (arg->has)
		si_readhas(&q);
	r->rc = si_read(&q);
	r->read_disk  = q.read_disk;
	r->read_cache = q.read_cache;
	r->v = q.result.v;
	si_readclose(&q);
	si_commit(&x);
	return r->rc;
}
Пример #9
0
int si_readcommited(si *index, sr *r, sv *v)
{
	ssiter i;
	ss_iterinit(si_iter, &i);
	ss_iteropen(si_iter, &i, r, index, SS_GTE,
	            sv_pointer(v), sv_size(v));
	sinode *node;
	node = ss_iterof(si_iter, &i);
	assert(node != NULL);
	sibranch *b = node->branch;
	int rc;
	while (b) {
		rc = si_readcommited_branch(r, b, v);
		if (rc)
			return 1;
		b = b->next;
	}
	rc = si_readcommited_branch(r, &node->self, v);
	return rc;
}
Пример #10
0
int sx_get(sx *x, sxindex *index, sv *key, sv *result)
{
	sxmanager *m = x->manager;
	ssrbnode *n = NULL;
	int rc;
	rc = sx_match(&index->i, index->r->scheme,
	              sv_pointer(key),
	              sv_size(key),
	              &n);
	if (! (rc == 0 && n))
		goto add;
	sxv *head = sscast(n, sxv, node);
	sxv *v = sx_vmatch(head, x->id);
	if (v == NULL)
		goto add;
	if (ssunlikely((v->v->flags & SVGET) > 0))
		return 0;
	if (ssunlikely((v->v->flags & SVDELETE) > 0))
		return 2;
	sv vv;
	sv_init(&vv, &sv_vif, v->v, NULL);
	svv *ret = sv_vdup(m->r, &vv);
	if (ssunlikely(ret == NULL)) {
		rc = sr_oom(m->r->e);
	} else {
		sv_init(result, &sv_vif, ret, NULL);
		rc = 1;
	}
	return rc;
add:
	/* track a start of the latest read sequence in the
	 * transactional log */
	if (x->log_read == -1)
		x->log_read = sv_logcount(&x->log);
	rc = sx_set(x, index, key->v);
	if (ssunlikely(rc == -1))
		return -1;
	sv_vref((svv*)key->v);
	return 0;
}
Пример #11
0
static inline int
si_rangebranch(siread *q, sinode *n, sibranch *b, svmerge *m)
{
	sicachebranch *c = si_cachefollow(q->cache);
	assert(c->branch == b);
	/* iterate cache */
	if (ss_iterhas(sd_read, &c->i)) {
		svmergesrc *s = sv_mergeadd(m, &c->i);
		si_readstat(q, 1, n, 1);
		s->ptr = c;
		return 1;
	}
	if (c->open) {
		return 1;
	}
	if (q->cache_only) {
		return 2;
	}
	c->open = 1;
	/* choose compression type */
	int compression;
	ssfilterif *compression_if;
	if (! si_branchis_root(b)) {
		compression    = q->index->scheme->compression_branch;
		compression_if = q->index->scheme->compression_branch_if;
	} else {
		compression    = q->index->scheme->compression;
		compression_if = q->index->scheme->compression_if;
	}
	sdreadarg arg = {
		.index           = &b->index,
		.buf             = &c->buf_a,
		.buf_xf          = &c->buf_b,
		.buf_read        = &q->index->readbuf,
		.index_iter      = &c->index_iter,
		.page_iter       = &c->page_iter,
		.use_memory      = n->in_memory,
		.use_mmap        = q->index->scheme->mmap,
		.use_mmap_copy   = 1,
		.use_compression = compression,
		.compression_if  = compression_if,
		.has             = 0,
		.has_vlsn        = 0,
		.o               = q->order,
		.memory          = &b->copy,
		.mmap            = &n->map,
		.file            = &n->file,
		.r               = q->r
	};
	ss_iterinit(sd_read, &c->i);
	int rc = ss_iteropen(sd_read, &c->i, &arg, q->key, q->keysize);
	int reads = sd_read_stat(&c->i);
	si_readstat(q, 0, n, reads);
	if (ssunlikely(rc == -1))
		return -1;
	if (ssunlikely(! ss_iterhas(sd_read, &c->i)))
		return 0;
	svmergesrc *s = sv_mergeadd(m, &c->i);
	s->ptr = c;
	return 1;
}

static inline int
si_range(siread *q)
{
	assert(q->has == 0);

	ssiter i;
	ss_iterinit(si_iter, &i);
	ss_iteropen(si_iter, &i, q->r, q->index, q->order, q->key, q->keysize);
	sinode *node;
next_node:
	node = ss_iterof(si_iter, &i);
	if (ssunlikely(node == NULL))
		return 0;
	si_txtrack(q->x, node);

	/* prepare sources */
	svmerge *m = &q->merge;
	int count = node->branch_count + 2 + 1;
	int rc = sv_mergeprepare(m, q->r, count);
	if (ssunlikely(rc == -1)) {
		sr_errorreset(q->r->e);
		return -1;
	}

	/* external source (upsert) */
	svmergesrc *s;
	sv upbuf_reserve;
	ssbuf upbuf;
	if (ssunlikely(q->upsert_v && q->upsert_v->v)) {
		ss_bufinit_reserve(&upbuf, &upbuf_reserve, sizeof(upbuf_reserve));
		ss_bufadd(&upbuf, NULL, (void*)&q->upsert_v, sizeof(sv*));
		s = sv_mergeadd(m, NULL);
		ss_iterinit(ss_bufiterref, &s->src);
		ss_iteropen(ss_bufiterref, &s->src, &upbuf, sizeof(sv*));
	}

	/* in-memory indexes */
	svindex *second;
	svindex *first = si_nodeindex_priority(node, &second);
	if (first->count) {
		s = sv_mergeadd(m, NULL);
		ss_iterinit(sv_indexiter, &s->src);
		ss_iteropen(sv_indexiter, &s->src, q->r, first, q->order,
		            q->key, q->keysize);
	}
	if (ssunlikely(second && second->count)) {
		s = sv_mergeadd(m, NULL);
		ss_iterinit(sv_indexiter, &s->src);
		ss_iteropen(sv_indexiter, &s->src, q->r, second, q->order,
		            q->key, q->keysize);
	}

	/* cache and branches */
	rc = si_cachevalidate(q->cache, node);
	if (ssunlikely(rc == -1)) {
		sr_oom(q->r->e);
		return -1;
	}
	sibranch *b = node->branch;
	while (b) {
		rc = si_rangebranch(q, node, b, m);
		if (ssunlikely(rc == -1 || rc == 2))
			return rc;
		b = b->next;
	}

	/* merge and filter data stream */
	ssiter j;
	ss_iterinit(sv_mergeiter, &j);
	ss_iteropen(sv_mergeiter, &j, q->r, m, q->order);
	ssiter k;
	ss_iterinit(sv_readiter, &k);
	ss_iteropen(sv_readiter, &k, q->r, &j, &q->index->u, q->vlsn, 0);
	sv *v = ss_iterof(sv_readiter, &k);
	if (ssunlikely(v == NULL)) {
		sv_mergereset(&q->merge);
		ss_iternext(si_iter, &i);
		goto next_node;
	}

	rc = 1;
	/* convert upsert search to SS_EQ */
	if (q->upsert_eq) {
		rc = sr_compare(q->r->scheme, sv_pointer(v), sv_size(v),
		                q->key, q->keysize);
		rc = rc == 0;
	}
	/* do prefix search */
	if (q->prefix && rc) {
		rc = sr_compareprefix(q->r->scheme, q->prefix, q->prefixsize,
		                      sv_pointer(v),
		                      sv_size(v));
	}
	if (sslikely(rc == 1)) {
		if (ssunlikely(si_readdup(q, v) == -1))
			return -1;
	}

	/* skip a possible duplicates from data sources */
	ss_iternext(sv_readiter, &k);
	return rc;
}
Пример #12
0
static inline void
si_qrangebranch(siquery *q, sinode *n, sibranch *b, svmerge *m)
{
	sicachebranch *cb = si_cachefollow(q->cache);
	assert(cb->branch == b);
	/* iterate cache */
	if (ss_iterhas(si_read, &cb->i)) {
		svmergesrc *s = sv_mergeadd(m, &cb->i);
		q->index->read_cache++;
		s->ptr = cb;
		return;
	}
	if (cb->open) {
		return;
	}
	cb->open = 1;
	sireadarg arg = {
		.scheme     = q->index->scheme,
		.index      = q->index,
		.n          = n,
		.b          = b,
		.buf        = &cb->buf_a,
		.buf_xf     = &cb->buf_b,
		.buf_read   = &q->index->readbuf,
		.index_iter = &cb->index_iter,
		.page_iter  = &cb->page_iter,
		.vlsn       = q->vlsn,
		.has        = 0,
		.mmap_copy  = 1,
		.o          = q->order,
		.r          = q->r
	};
	ss_iterinit(si_read, &cb->i);
	int rc = ss_iteropen(si_read, &cb->i, &arg, q->key, q->keysize);
	if (ssunlikely(rc == -1))
		return;
	if (ssunlikely(! ss_iterhas(si_read, &cb->i)))
		return;
	svmergesrc *s = sv_mergeadd(m, &cb->i);
	s->ptr = cb;
}

static inline int
si_qrange(siquery *q)
{
	ssiter i;
	ss_iterinit(si_iter, &i);
	ss_iteropen(si_iter, &i, q->r, q->index, q->order, q->key, q->keysize);
	sinode *node;
next_node:
	node = ss_iterof(si_iter, &i);
	if (ssunlikely(node == NULL))
		return 0;

	/* prepare sources */
	svmerge *m = &q->merge;
	int count = node->branch_count + 2 + 1;
	int rc = sv_mergeprepare(m, q->r, count);
	if (ssunlikely(rc == -1)) {
		sr_errorreset(q->r->e);
		return -1;
	}

	/* external source (update) */
	svmergesrc *s;
	sv upbuf_reserve;
	ssbuf upbuf;
	if (ssunlikely(q->update_v && q->update_v->v)) {
		ss_bufinit_reserve(&upbuf, &upbuf_reserve, sizeof(upbuf_reserve));
		ss_bufadd(&upbuf, NULL, (void*)&q->update_v, sizeof(sv*));
		s = sv_mergeadd(m, NULL);
		ss_iterinit(ss_bufiterref, &s->src);
		ss_iteropen(ss_bufiterref, &s->src, &upbuf, sizeof(sv*));
	}

	/* in-memory indexes */
	svindex *second;
	svindex *first = si_nodeindex_priority(node, &second);
	if (first->count) {
		s = sv_mergeadd(m, NULL);
		ss_iterinit(sv_indexiter, &s->src);
		ss_iteropen(sv_indexiter, &s->src, q->r, first, q->order,
		            q->key, q->keysize);
	}
	if (ssunlikely(second && second->count)) {
		s = sv_mergeadd(m, NULL);
		ss_iterinit(sv_indexiter, &s->src);
		ss_iteropen(sv_indexiter, &s->src, q->r, second, q->order,
		            q->key, q->keysize);
	}

	/* cache and branches */
	rc = si_cachevalidate(q->cache, node);
	if (ssunlikely(rc == -1)) {
		sr_oom(q->r->e);
		return -1;
	}
	sibranch *b = node->branch;
	while (b) {
		si_qrangebranch(q, node, b, m);
		b = b->next;
	}

	/* merge and filter data stream */
	ssiter j;
	ss_iterinit(sv_mergeiter, &j);
	ss_iteropen(sv_mergeiter, &j, q->r, m, q->order);
	ssiter k;
	ss_iterinit(sv_readiter, &k);
	ss_iteropen(sv_readiter, &k, q->r, &j, &q->index->u, q->vlsn, 0);
	sv *v = ss_iterof(sv_readiter, &k);
	if (ssunlikely(v == NULL)) {
		sv_mergereset(&q->merge);
		ss_iternext(si_iter, &i);
		goto next_node;
	}

	rc = 1;
	/* convert update search to SS_EQ */
	if (q->update_eq) {
		rc = sr_compare(q->r->scheme, sv_pointer(v), sv_size(v),
		                q->key, q->keysize);
		rc = rc == 0;
	}
	/* do prefix search */
	if (q->prefix && rc) {
		rc = sr_compareprefix(q->r->scheme, q->prefix, q->prefixsize,
		                      sv_pointer(v),
		                      sv_size(v));
	}
	if (sslikely(rc == 1)) {
		if (ssunlikely(si_querydup(q, v) == -1))
			return -1;
	}

	/* skip a possible duplicates from data sources */
	ss_iternext(sv_readiter, &k);
	return rc;
}
Пример #13
0
so *se_read(sedb *db, sedocument *o, sx *x, uint64_t vlsn,
            sicache *cache)
{
	se *e = se_of(&db->o);
	uint64_t start  = ss_utime();

	/* prepare the key */
	int auto_close = o->created <= 1;
	int rc = se_document_createkey(o);
	if (ssunlikely(rc == -1))
		goto error;
	rc = se_document_validate_ro(o, &db->o);
	if (ssunlikely(rc == -1))
		goto error;
	if (ssunlikely(! se_active(e)))
		goto error;

	sv vup;
	sv_init(&vup, &sv_vif, NULL, NULL);

	sedocument *ret = NULL;

	/* concurrent */
	if (x && o->order == SS_EQ) {
		/* note: prefix is ignored during concurrent
		 * index search */
		int rc = sx_get(x, &db->coindex, &o->v, &vup);
		if (ssunlikely(rc == -1 || rc == 2 /* delete */))
			goto error;
		if (rc == 1 && !sv_is(&vup, SVUPSERT)) {
			ret = (sedocument*)se_document_new(e, &db->o, &vup);
			if (sslikely(ret)) {
				ret->cold_only = o->cold_only;
				ret->created   = 1;
				ret->orderset  = 1;
				ret->flagset   = 1;
			} else {
				sv_vunref(db->r, vup.v);
			}
			if (auto_close)
				so_destroy(&o->o);
			return &ret->o;
		}
	} else {
		sx_get_autocommit(&e->xm, &db->coindex);
	}

	/* prepare read cache */
	int cachegc = 0;
	if (cache == NULL) {
		cachegc = 1;
		cache = si_cachepool_pop(&e->cachepool);
		if (ssunlikely(cache == NULL)) {
			if (vup.v)
				sv_vunref(db->r, vup.v);
			sr_oom(&e->error);
			goto error;
		}
	}

	sv_vref(o->v.v);

	/* do read */
	siread rq;
	si_readopen(&rq, db->index, cache, o->order,
	            vlsn,
	            sv_pointer(&o->v),
	            vup.v,
	            o->prefix_copy,
	            o->prefix_size,
	            o->cold_only,
	            0,
	            start);
	rc = si_read(&rq);
	si_readclose(&rq);

	/* prepare result */
	if (rc == 1) {
		ret = (sedocument*)se_readresult(e, &rq);
		if (ret)
			o->prefix_copy = NULL;
	}

	/* cleanup */
	if (o->v.v)
		sv_vunref(db->r, o->v.v);
	if (vup.v)
		sv_vunref(db->r, vup.v);
	if (ret == NULL && rq.result.v)
		sv_vunref(db->r, rq.result.v);
	if (cachegc && cache)
		si_cachepool_push(cache);

	if (auto_close)
		so_destroy(&o->o);
	return &ret->o;
error:
	if (auto_close)
		so_destroy(&o->o);
	return NULL;
}