static inline int si_getresult(siread *q, sv *v, int compare) { int rc; if (compare) { rc = sr_compare(q->r->scheme, sv_pointer(v), sv_size(v), q->key, q->keysize); if (ssunlikely(rc != 0)) return 0; } if (q->prefix) { rc = sr_compareprefix(q->r->scheme, q->prefix, q->prefixsize, sv_pointer(v), sv_size(v)); if (ssunlikely(! rc)) return 0; } if (ssunlikely(q->has)) return sv_lsn(v) > q->vlsn; if (ssunlikely(sv_is(v, SVDELETE))) return 2; rc = si_readdup(q, v); if (ssunlikely(rc == -1)) return -1; return 1; }
int sx_get(sx *t, sxindex *index, sv *key, sv *result) { sxmanager *m = t->manager; ssrbnode *n = NULL; int rc = sx_match(&index->i, index->scheme, sv_pointer(key), sv_size(key), &n); if (! (rc == 0 && n)) { rc = 0; goto done; } sxv *head = sscast(n, sxv, node); sxv *v = sx_vmatch(head, t->id); if (v == NULL) { rc = 0; goto done; } if (ssunlikely((v->v->flags & SVDELETE) > 0)) { rc = 2; goto done; } sv vv; sv_init(&vv, &sv_vif, v->v, NULL); svv *ret = sv_vdup(m->a, &vv); if (ssunlikely(ret == NULL)) { rc = sr_oom(index->r->e); } else { sv_init(result, &sv_vif, ret, NULL); rc = 1; } done: return rc; }
int sd_buildadd(sdbuild *b, sr *r, sv *v, uint32_t flags) { /* prepare metadata reference */ int rc = sr_bufensure(&b->k, r->a, sizeof(sdv)); if (srunlikely(rc == -1)) return sr_error(r->e, "%s", "memory allocation failed"); sdpageheader *h = sd_buildheader(b); sdv *sv = (sdv*)b->k.p; sv->lsn = sv_lsn(v); sv->flags = sv_flags(v) | flags; sv->size = sv_size(v); sv->offset = sr_bufused(&b->v) - sd_buildref(b)->v; /* copy object */ rc = sr_bufensure(&b->v, r->a, sv->size); if (srunlikely(rc == -1)) return sr_error(r->e, "%s", "memory allocation failed"); memcpy(b->v.p, sv_pointer(v), sv->size); sr_bufadvance(&b->v, sv->size); sr_bufadvance(&b->k, sizeof(sdv)); /* update page header */ h->count++; h->size += sv->size + sizeof(sdv); if (sv->lsn > h->lsnmax) h->lsnmax = sv->lsn; if (sv->lsn < h->lsnmin) h->lsnmin = sv->lsn; if (sv->flags & SVDUP) { h->countdup++; if (sv->lsn < h->lsnmindup) h->lsnmindup = sv->lsn; } return 0; }
int songvec_delete(struct songvec *sv, const struct song *del) { size_t i; g_mutex_lock(nr_lock); for (i = 0; i < sv->nr; ++i) { if (sv->base[i] != del) continue; /* we _don't_ call song_free() here */ if (!--sv->nr) { g_free(sv->base); sv->base = NULL; } else { memmove(&sv->base[i], &sv->base[i + 1], (sv->nr - i) * sizeof(struct song *)); sv->base = g_realloc(sv->base, sv_size(sv)); } g_mutex_unlock(nr_lock); return i; } g_mutex_unlock(nr_lock); return -1; /* not found */ }
sxstate sx_setstmt(sxmanager *m, sxindex *index, sv *v) { sr_seq(m->seq, SR_TSNNEXT); ssrbnode *n = NULL; int rc = sx_match(&index->i, index->scheme, sv_pointer(v), sv_size(v), &n); if (rc == 0 && n) return SXLOCK; return SXCOMMIT; }
void songvec_add(struct songvec *sv, struct song *add) { g_mutex_lock(nr_lock); ++sv->nr; sv->base = g_realloc(sv->base, sv_size(sv)); sv->base[sv->nr - 1] = add; g_mutex_unlock(nr_lock); }
ss_t *ss_cpy_sub(ss_t **s, const ss_t *src, const sv_t *offs, const size_t nth) { ASSERT_RETURN_IF(!s, ss_void); ASSERT_RETURN_IF((!src || !offs), ss_clear(s)); /* BEHAVIOR: empty */ const size_t elems = sv_size(offs) / 2; ASSERT_RETURN_IF(nth >= elems, ss_clear(s)); /* BEHAVIOR: empty */ const size_t off = (size_t)sv_u_at(offs, nth * 2); const size_t size = (size_t)sv_u_at(offs, nth * 2 + 1); return ss_cpy_substr(s, src, off, size); }
static int si_readcommited_branch(sr *r, sibranch *b, sv *v) { ssiter i; ss_iterinit(sd_indexiter, &i); ss_iteropen(sd_indexiter, &i, r, &b->index, SS_GTE, sv_pointer(v), sv_size(v)); sdindexpage *page = ss_iterof(sd_indexiter, &i); if (page == NULL) return 0; return page->lsnmax >= sv_lsn(v); }
static inline int se_txwrite(setx *t, sev *o, uint8_t flags) { se *e = se_of(&t->o); sedb *db = se_cast(o->o.parent, sedb*, SEDB); /* validate req */ if (ssunlikely(t->t.state == SXPREPARE)) { sr_error(&e->error, "%s", "transaction is in 'prepare' state (read-only)"); goto error; } /* validate database status */ int status = se_status(&db->status); switch (status) { case SE_SHUTDOWN: if (ssunlikely(! se_dbvisible(db, t->t.id))) { sr_error(&e->error, "%s", "database is invisible for the transaction"); goto error; } break; case SE_RECOVER: case SE_ONLINE: break; default: goto error; } if (flags == SVUPDATE && !sf_updatehas(&db->scheme.fmt_update)) flags = 0; /* prepare object */ svv *v; int rc = se_dbv(db, o, 0, &v); if (ssunlikely(rc == -1)) goto error; v->flags = flags; v->log = o->log; sv vp; sv_init(&vp, &sv_vif, v, NULL); so_destroy(&o->o); /* ensure quota */ int size = sizeof(svv) + sv_size(&vp); ss_quota(&e->quota, SS_QADD, size); /* concurrent index only */ rc = sx_set(&t->t, &db->coindex, v); if (ssunlikely(rc == -1)) { ss_quota(&e->quota, SS_QREMOVE, size); return -1; } return 0; error: so_destroy(&o->o); return -1; }
static inline int sd_buildadd_raw(sdbuild *b, sr *r, sv *v) { uint64_t lsn = sv_lsn(v); uint32_t size = sv_size(v); uint32_t sizemeta = ss_leb128size(size) + ss_leb128size(lsn); int rc = ss_bufensure(&b->v, r->a, sizemeta + size); if (ssunlikely(rc == -1)) return sr_oom(r->e); ss_bufadvance(&b->v, ss_leb128write(b->v.p, size)); ss_bufadvance(&b->v, ss_leb128write(b->v.p, lsn)); memcpy(b->v.p, sv_pointer(v), size); ss_bufadvance(&b->v, size); return 0; }
int se_reqread(sereq *r) { sereqarg *arg = &r->arg; sedb *db = (sedb*)r->db; uint32_t keysize; void *key; if (sslikely(arg->v.v)) { keysize = sv_size(&arg->v); key = sv_pointer(&arg->v); } else { keysize = 0; key = NULL; } char *prefix; uint32_t prefixsize; if (arg->vprefix.v) { void *vptr = sv_vpointer(arg->vprefix.v); prefix = sf_key(vptr, 0); prefixsize = sf_keysize(vptr, 0); } else { prefix = NULL; prefixsize = 0; } if (sslikely(arg->vlsn_generate)) arg->vlsn = sr_seq(db->r.seq, SR_LSN); sitx x; si_begin(&x, &db->index, 1); siread q; si_readopen(&q, &x, arg->cache, arg->order, arg->vlsn, prefix, prefixsize, key, keysize); if (arg->update) si_readupdate(&q, &arg->vup, arg->update_eq); if (arg->cache_only) si_readcache_only(&q); if (arg->has) si_readhas(&q); r->rc = si_read(&q); r->read_disk = q.read_disk; r->read_cache = q.read_cache; r->v = q.result.v; si_readclose(&q); si_commit(&x); return r->rc; }
static void svv_kv(stc *cx) { sra a; sr_aopen(&a, &sr_stda); srkey cmp; sr_keyinit(&cmp); srkeypart *part = sr_keyadd(&cmp, &a); t( sr_keypart_setname(part, &a, "key") == 0 ); t( sr_keypart_set(part, &a, "u32") == 0 ); sr r; sr_init(&r, NULL, &a, NULL, SR_FKV, &cmp, NULL, NULL, NULL); uint32_t key = 123; uint32_t value = 321; srformatv pv; pv.key = (char*)&key; pv.r.size = sizeof(key); pv.r.offset = 0; svv *vv = sv_vbuild(&r, &pv, 1, (char*)&value, sizeof(value)); t( vv != NULL ); vv->flags = SVSET; vv->lsn = 10; sv v; sv_init(&v, &sv_vif, vv, NULL); t( sv_flags(&v) == SVSET ); t( sv_lsn(&v) == 10 ); sv_lsnset(&v, 8); t( sv_lsn(&v) == 8 ); t( *(uint32_t*)sr_formatkey(sv_pointer(&v), 0) == key ); t( sr_formatkey_size(sv_pointer(&v), 0) == sizeof(key) ); t( *(uint32_t*)sr_formatvalue(SR_FKV, &cmp, sv_pointer(&v)) == value ); t( sr_formatvalue_size(SR_FKV, &cmp, sv_pointer(&v), sv_size(&v) ) == sizeof(value) ); sv_vfree(&a, vv); sr_keyfree(&cmp, &a); }
int si_readcommited(si *index, sr *r, sv *v) { ssiter i; ss_iterinit(si_iter, &i); ss_iteropen(si_iter, &i, r, index, SS_GTE, sv_pointer(v), sv_size(v)); sinode *node; node = ss_iterof(si_iter, &i); assert(node != NULL); sibranch *b = node->branch; int rc; while (b) { rc = si_readcommited_branch(r, b, v); if (rc) return 1; b = b->next; } rc = si_readcommited_branch(r, &node->self, v); return rc; }
int sd_buildadd(sdbuild *b, sr *r, sv *v, uint32_t flags) { /* prepare object metadata */ int rc = ss_bufensure(&b->m, r->a, sizeof(sdv)); if (ssunlikely(rc == -1)) return sr_oom(r->e); sdpageheader *h = sd_buildheader(b); sdv *sv = (sdv*)b->m.p; sv->flags = sv_flags(v) | flags; sv->offset = ss_bufused(&b->v) - sd_buildref(b)->v; ss_bufadvance(&b->m, sizeof(sdv)); /* copy object */ switch (r->fmt_storage) { case SF_SKEYVALUE: rc = sd_buildadd_keyvalue(b, r, v); break; case SF_SRAW: rc = sd_buildadd_raw(b, r, v); break; } if (ssunlikely(rc == -1)) return -1; /* update page header */ h->count++; uint32_t size = sizeof(sdv) + sv_size(v) + sizeof(sfref) * r->scheme->count; if (size > b->vmax) b->vmax = size; uint64_t lsn = sv_lsn(v); if (lsn > h->lsnmax) h->lsnmax = lsn; if (lsn < h->lsnmin) h->lsnmin = lsn; if (sv->flags & SVDUP) { h->countdup++; if (lsn < h->lsnmindup) h->lsnmindup = lsn; } return 0; }
int sx_get(sx *x, sxindex *index, sv *key, sv *result) { sxmanager *m = x->manager; ssrbnode *n = NULL; int rc; rc = sx_match(&index->i, index->r->scheme, sv_pointer(key), sv_size(key), &n); if (! (rc == 0 && n)) goto add; sxv *head = sscast(n, sxv, node); sxv *v = sx_vmatch(head, x->id); if (v == NULL) goto add; if (ssunlikely((v->v->flags & SVGET) > 0)) return 0; if (ssunlikely((v->v->flags & SVDELETE) > 0)) return 2; sv vv; sv_init(&vv, &sv_vif, v->v, NULL); svv *ret = sv_vdup(m->r, &vv); if (ssunlikely(ret == NULL)) { rc = sr_oom(m->r->e); } else { sv_init(result, &sv_vif, ret, NULL); rc = 1; } return rc; add: /* track a start of the latest read sequence in the * transactional log */ if (x->log_read == -1) x->log_read = sv_logcount(&x->log); rc = sx_set(x, index, key->v); if (ssunlikely(rc == -1)) return -1; sv_vref((svv*)key->v); return 0; }
static inline void si_qrangebranch(siquery *q, sinode *n, sibranch *b, svmerge *m) { sicachebranch *cb = si_cachefollow(q->cache); assert(cb->branch == b); /* iterate cache */ if (ss_iterhas(si_read, &cb->i)) { svmergesrc *s = sv_mergeadd(m, &cb->i); q->index->read_cache++; s->ptr = cb; return; } if (cb->open) { return; } cb->open = 1; sireadarg arg = { .scheme = q->index->scheme, .index = q->index, .n = n, .b = b, .buf = &cb->buf_a, .buf_xf = &cb->buf_b, .buf_read = &q->index->readbuf, .index_iter = &cb->index_iter, .page_iter = &cb->page_iter, .vlsn = q->vlsn, .has = 0, .mmap_copy = 1, .o = q->order, .r = q->r }; ss_iterinit(si_read, &cb->i); int rc = ss_iteropen(si_read, &cb->i, &arg, q->key, q->keysize); if (ssunlikely(rc == -1)) return; if (ssunlikely(! ss_iterhas(si_read, &cb->i))) return; svmergesrc *s = sv_mergeadd(m, &cb->i); s->ptr = cb; } static inline int si_qrange(siquery *q) { ssiter i; ss_iterinit(si_iter, &i); ss_iteropen(si_iter, &i, q->r, q->index, q->order, q->key, q->keysize); sinode *node; next_node: node = ss_iterof(si_iter, &i); if (ssunlikely(node == NULL)) return 0; /* prepare sources */ svmerge *m = &q->merge; int count = node->branch_count + 2 + 1; int rc = sv_mergeprepare(m, q->r, count); if (ssunlikely(rc == -1)) { sr_errorreset(q->r->e); return -1; } /* external source (update) */ svmergesrc *s; sv upbuf_reserve; ssbuf upbuf; if (ssunlikely(q->update_v && q->update_v->v)) { ss_bufinit_reserve(&upbuf, &upbuf_reserve, sizeof(upbuf_reserve)); ss_bufadd(&upbuf, NULL, (void*)&q->update_v, sizeof(sv*)); s = sv_mergeadd(m, NULL); ss_iterinit(ss_bufiterref, &s->src); ss_iteropen(ss_bufiterref, &s->src, &upbuf, sizeof(sv*)); } /* in-memory indexes */ svindex *second; svindex *first = si_nodeindex_priority(node, &second); if (first->count) { s = sv_mergeadd(m, NULL); ss_iterinit(sv_indexiter, &s->src); ss_iteropen(sv_indexiter, &s->src, q->r, first, q->order, q->key, q->keysize); } if (ssunlikely(second && second->count)) { s = sv_mergeadd(m, NULL); ss_iterinit(sv_indexiter, &s->src); ss_iteropen(sv_indexiter, &s->src, q->r, second, q->order, q->key, q->keysize); } /* cache and branches */ rc = si_cachevalidate(q->cache, node); if (ssunlikely(rc == -1)) { sr_oom(q->r->e); return -1; } sibranch *b = node->branch; while (b) { si_qrangebranch(q, node, b, m); b = b->next; } /* merge and filter data stream */ ssiter j; ss_iterinit(sv_mergeiter, &j); ss_iteropen(sv_mergeiter, &j, q->r, m, q->order); ssiter k; ss_iterinit(sv_readiter, &k); ss_iteropen(sv_readiter, &k, q->r, &j, &q->index->u, q->vlsn, 0); sv *v = ss_iterof(sv_readiter, &k); if (ssunlikely(v == NULL)) { sv_mergereset(&q->merge); ss_iternext(si_iter, &i); goto next_node; } rc = 1; /* convert update search to SS_EQ */ if (q->update_eq) { rc = sr_compare(q->r->scheme, sv_pointer(v), sv_size(v), q->key, q->keysize); rc = rc == 0; } /* do prefix search */ if (q->prefix && rc) { rc = sr_compareprefix(q->r->scheme, q->prefix, q->prefixsize, sv_pointer(v), sv_size(v)); } if (sslikely(rc == 1)) { if (ssunlikely(si_querydup(q, v) == -1)) return -1; } /* skip a possible duplicates from data sources */ ss_iternext(sv_readiter, &k); return rc; }
static inline int si_rangebranch(siread *q, sinode *n, sibranch *b, svmerge *m) { sicachebranch *c = si_cachefollow(q->cache); assert(c->branch == b); /* iterate cache */ if (ss_iterhas(sd_read, &c->i)) { svmergesrc *s = sv_mergeadd(m, &c->i); si_readstat(q, 1, n, 1); s->ptr = c; return 1; } if (c->open) { return 1; } if (q->cache_only) { return 2; } c->open = 1; /* choose compression type */ int compression; ssfilterif *compression_if; if (! si_branchis_root(b)) { compression = q->index->scheme->compression_branch; compression_if = q->index->scheme->compression_branch_if; } else { compression = q->index->scheme->compression; compression_if = q->index->scheme->compression_if; } sdreadarg arg = { .index = &b->index, .buf = &c->buf_a, .buf_xf = &c->buf_b, .buf_read = &q->index->readbuf, .index_iter = &c->index_iter, .page_iter = &c->page_iter, .use_memory = n->in_memory, .use_mmap = q->index->scheme->mmap, .use_mmap_copy = 1, .use_compression = compression, .compression_if = compression_if, .has = 0, .has_vlsn = 0, .o = q->order, .memory = &b->copy, .mmap = &n->map, .file = &n->file, .r = q->r }; ss_iterinit(sd_read, &c->i); int rc = ss_iteropen(sd_read, &c->i, &arg, q->key, q->keysize); int reads = sd_read_stat(&c->i); si_readstat(q, 0, n, reads); if (ssunlikely(rc == -1)) return -1; if (ssunlikely(! ss_iterhas(sd_read, &c->i))) return 0; svmergesrc *s = sv_mergeadd(m, &c->i); s->ptr = c; return 1; } static inline int si_range(siread *q) { assert(q->has == 0); ssiter i; ss_iterinit(si_iter, &i); ss_iteropen(si_iter, &i, q->r, q->index, q->order, q->key, q->keysize); sinode *node; next_node: node = ss_iterof(si_iter, &i); if (ssunlikely(node == NULL)) return 0; si_txtrack(q->x, node); /* prepare sources */ svmerge *m = &q->merge; int count = node->branch_count + 2 + 1; int rc = sv_mergeprepare(m, q->r, count); if (ssunlikely(rc == -1)) { sr_errorreset(q->r->e); return -1; } /* external source (upsert) */ svmergesrc *s; sv upbuf_reserve; ssbuf upbuf; if (ssunlikely(q->upsert_v && q->upsert_v->v)) { ss_bufinit_reserve(&upbuf, &upbuf_reserve, sizeof(upbuf_reserve)); ss_bufadd(&upbuf, NULL, (void*)&q->upsert_v, sizeof(sv*)); s = sv_mergeadd(m, NULL); ss_iterinit(ss_bufiterref, &s->src); ss_iteropen(ss_bufiterref, &s->src, &upbuf, sizeof(sv*)); } /* in-memory indexes */ svindex *second; svindex *first = si_nodeindex_priority(node, &second); if (first->count) { s = sv_mergeadd(m, NULL); ss_iterinit(sv_indexiter, &s->src); ss_iteropen(sv_indexiter, &s->src, q->r, first, q->order, q->key, q->keysize); } if (ssunlikely(second && second->count)) { s = sv_mergeadd(m, NULL); ss_iterinit(sv_indexiter, &s->src); ss_iteropen(sv_indexiter, &s->src, q->r, second, q->order, q->key, q->keysize); } /* cache and branches */ rc = si_cachevalidate(q->cache, node); if (ssunlikely(rc == -1)) { sr_oom(q->r->e); return -1; } sibranch *b = node->branch; while (b) { rc = si_rangebranch(q, node, b, m); if (ssunlikely(rc == -1 || rc == 2)) return rc; b = b->next; } /* merge and filter data stream */ ssiter j; ss_iterinit(sv_mergeiter, &j); ss_iteropen(sv_mergeiter, &j, q->r, m, q->order); ssiter k; ss_iterinit(sv_readiter, &k); ss_iteropen(sv_readiter, &k, q->r, &j, &q->index->u, q->vlsn, 0); sv *v = ss_iterof(sv_readiter, &k); if (ssunlikely(v == NULL)) { sv_mergereset(&q->merge); ss_iternext(si_iter, &i); goto next_node; } rc = 1; /* convert upsert search to SS_EQ */ if (q->upsert_eq) { rc = sr_compare(q->r->scheme, sv_pointer(v), sv_size(v), q->key, q->keysize); rc = rc == 0; } /* do prefix search */ if (q->prefix && rc) { rc = sr_compareprefix(q->r->scheme, q->prefix, q->prefixsize, sv_pointer(v), sv_size(v)); } if (sslikely(rc == 1)) { if (ssunlikely(si_readdup(q, v) == -1)) return -1; } /* skip a possible duplicates from data sources */ ss_iternext(sv_readiter, &k); return rc; }