sxstate sx_begin(sxmanager *m, sx *x, sxtype type, uint64_t vlsn) { sx_promote(x, SXREADY); x->type = type; x->log_read = -1; sr_seqlock(m->r->seq); x->csn = m->csn; x->id = sr_seqdo(m->r->seq, SR_TSNNEXT); if (sslikely(vlsn == 0)) x->vlsn = sr_seqdo(m->r->seq, SR_LSN); else x->vlsn = vlsn; sr_sequnlock(m->r->seq); sx_init(m, x); ss_spinlock(&m->lock); ssrbnode *n = NULL; int rc = sx_matchtx(&m->i, NULL, (char*)&x->id, sizeof(x->id), &n); if (rc == 0 && n) { assert(0); } else { ss_rbset(&m->i, n, rc, &x->node); } if (type == SXRO) m->count_rd++; else m->count_rw++; ss_spinunlock(&m->lock); return SXREADY; }
static inline void sx_end(sx *t) { sxmanager *m = t->manager; ss_spinlock(&m->lock); ss_rbremove(&m->i, &t->node); m->count--; ss_spinunlock(&m->lock); }
static inline int ss_testvfs_call(ssvfs *f) { sstestvfs *o = (sstestvfs*)f->priv; ss_spinlock(&o->lock); int generate_fail = o->n >= o->fail_from; o->n++; ss_spinunlock(&o->lock); return generate_fail; }
uint64_t sx_max(sxmanager *m) { ss_spinlock(&m->lock); uint64_t id = 0; if (sx_count(m) > 0) { ssrbnode *node = ss_rbmax(&m->i); sx *max = sscast(node, sx, node); id = max->id; } ss_spinunlock(&m->lock); return id; }
static inline void sx_end(sx *x) { sxmanager *m = x->manager; ss_spinlock(&m->lock); ss_rbremove(&m->i, &x->node); if (x->type == SXRO) m->count_rd--; else m->count_rw--; ss_spinunlock(&m->lock); }
uint32_t sx_min(sxmanager *m) { ss_spinlock(&m->lock); uint32_t id = 0; if (sx_count(m) > 0) { ssrbnode *node = ss_rbmin(&m->i); sx *min = sscast(node, sx, node); id = min->id; } ss_spinunlock(&m->lock); return id; }
static inline sshot void ss_slabafree(ssa *a, void *ptr) { ssslaba *s = (ssslaba*)a->priv; assert(ptr != NULL); ss_spinlock(&s->lock); *(char**)ptr = s->pool_free; s->pool_free = ptr; s->pool_free_count++; ss_spinunlock(&s->lock); }
uint64_t sx_vlsn(sxmanager *m) { ss_spinlock(&m->lock); uint64_t vlsn; if (sx_count(m) > 0) { ssrbnode *node = ss_rbmin(&m->i); sx *min = sscast(node, sx, node); vlsn = min->vlsn; } else { vlsn = sr_seq(m->r->seq, SR_LSN); } ss_spinunlock(&m->lock); return vlsn; }
sxstate sx_begin(sxmanager *m, sx *t, uint64_t vlsn) { t->s = SXREADY; t->complete = 0; sr_seqlock(m->seq); t->id = sr_seqdo(m->seq, SR_TSNNEXT); if (sslikely(vlsn == 0)) t->vlsn = sr_seqdo(m->seq, SR_LSN); else t->vlsn = vlsn; sr_sequnlock(m->seq); sx_init(m, t); ss_spinlock(&m->lock); ssrbnode *n = NULL; int rc = sx_matchtx(&m->i, NULL, (char*)&t->id, sizeof(t->id), &n); if (rc == 0 && n) { assert(0); } else { ss_rbset(&m->i, n, rc, &t->node); } m->count++; ss_spinunlock(&m->lock); return SXREADY; }
static inline sshot void* ss_slabamalloc(ssa *a, int size ssunused) { ssslaba *s = (ssslaba*)a->priv; assert(size == (int)s->slab_size); ss_spinlock(&s->lock); char *slab; if (sslikely(s->pool_free_count)) { slab = s->pool_free; s->pool_free = *(char**)slab; s->pool_free_count--; if (ssunlikely(s->pool_free_count == 0)) s->pool_free = NULL; } else if (ssunlikely(s->pool_next == s->pool_end)) { slab = NULL; } else { slab = s->pool_next; s->pool_next += s->slab_size; } ss_spinunlock(&s->lock); return slab; }
static int se_schedule(sescheduler *s, setask *task, seworker *w) { ss_trace(&w->trace, "%s", "schedule"); si_planinit(&task->plan); uint64_t now = ss_utime(); se *e = (se*)s->env; sedb *db; srzone *zone = se_zoneof(e); assert(zone != NULL); task->checkpoint_complete = 0; task->backup_complete = 0; task->rotate = 0; task->req = 0; task->gc = 0; task->db = NULL; ss_mutexlock(&s->lock); /* asynchronous reqs dispatcher */ if (s->req == 0) { switch (zone->async) { case 2: if (se_reqqueue(e) == 0) break; case 1: s->req = 1; task->req = zone->async; ss_mutexunlock(&s->lock); return 0; } } /* log gc and rotation */ if (s->rotate == 0) { task->rotate = 1; s->rotate = 1; } /* checkpoint */ int in_progress = 0; int rc; checkpoint: if (s->checkpoint) { task->plan.plan = SI_CHECKPOINT; task->plan.a = s->checkpoint_lsn; rc = se_schedule_plan(s, &task->plan, &db); switch (rc) { case 1: s->workers_branch++; se_dbref(db, 1); task->db = db; task->gc = 1; ss_mutexunlock(&s->lock); return 1; case 2: /* work in progress */ in_progress = 1; break; case 0: /* complete checkpoint */ s->checkpoint = 0; s->checkpoint_lsn_last = s->checkpoint_lsn; s->checkpoint_lsn = 0; task->checkpoint_complete = 1; break; } } /* apply zone policy */ switch (zone->mode) { case 0: /* compact_index */ case 1: /* compact_index + branch_count prio */ assert(0); break; case 2: /* checkpoint */ { if (in_progress) { ss_mutexunlock(&s->lock); return 0; } uint64_t lsn = sr_seq(&e->seq, SR_LSN); s->checkpoint_lsn = lsn; s->checkpoint = 1; goto checkpoint; } default: /* branch + compact */ assert(zone->mode == 3); } /* database shutdown-drop */ if (s->workers_gc_db < zone->gc_db_prio) { ss_spinlock(&e->dblock); db = NULL; if (ssunlikely(e->db_shutdown.n > 0)) { db = (sedb*)so_listfirst(&e->db_shutdown); if (se_dbgarbage(db)) { so_listdel(&e->db_shutdown, &db->o); } else { db = NULL; } } ss_spinunlock(&e->dblock); if (ssunlikely(db)) { if (db->dropped) task->plan.plan = SI_DROP; else task->plan.plan = SI_SHUTDOWN; s->workers_gc_db++; se_dbref(db, 1); task->db = db; ss_mutexunlock(&s->lock); return 1; } } /* backup */ if (s->backup && (s->workers_backup < zone->backup_prio)) { /* backup procedure. * * state 0 (start) * ------- * * a. disable log gc * b. mark to start backup (state 1) * * state 1 (background, delayed start) * ------- * * a. create backup_path/<bsn.incomplete> directory * b. create database directories * c. create log directory * d. state 2 * * state 2 (background, copy) * ------- * * a. schedule and execute node backup which bsn < backup_bsn * b. state 3 * * state 3 (background, completion) * ------- * * a. rotate log file * b. copy log files * c. enable log gc, schedule gc * d. rename <bsn.incomplete> into <bsn> * e. set last backup, set COMPLETE * */ if (s->backup == 1) { /* state 1 */ rc = se_backupstart(s); if (ssunlikely(rc == -1)) { se_backuperror(s); goto backup_error; } s->backup = 2; } /* state 2 */ task->plan.plan = SI_BACKUP; task->plan.a = s->backup_bsn; rc = se_schedule_plan(s, &task->plan, &db); switch (rc) { case 1: s->workers_backup++; se_dbref(db, 1); task->db = db; ss_mutexunlock(&s->lock); return 1; case 2: /* work in progress */ break; case 0: /* state 3 */ rc = se_backupcomplete(s, w); if (ssunlikely(rc == -1)) { se_backuperror(s); goto backup_error; } s->backup_events++; task->gc = 1; task->backup_complete = 1; break; } backup_error:; } /* garbage-collection */ if (s->gc) { if (s->workers_gc < zone->gc_prio) { task->plan.plan = SI_GC; task->plan.a = sx_vlsn(&e->xm); task->plan.b = zone->gc_wm; rc = se_schedule_plan(s, &task->plan, &db); switch (rc) { case 1: s->workers_gc++; se_dbref(db, 1); task->db = db; ss_mutexunlock(&s->lock); return 1; case 2: /* work in progress */ break; case 0: /* state 3 */ s->gc = 0; s->gc_last = now; break; } } } else { if (zone->gc_prio && zone->gc_period) { if ( (now - s->gc_last) >= ((uint64_t)zone->gc_period * 1000000) ) { s->gc = 1; } } } /* index aging */ if (s->age) { if (s->workers_branch < zone->branch_prio) { task->plan.plan = SI_AGE; task->plan.a = zone->branch_age * 1000000; /* ms */ task->plan.b = zone->branch_age_wm; rc = se_schedule_plan(s, &task->plan, &db); switch (rc) { case 1: s->workers_branch++; se_dbref(db, 1); task->db = db; ss_mutexunlock(&s->lock); return 1; case 0: s->age = 0; s->age_last = now; break; } } } else { if (zone->branch_prio && zone->branch_age_period) { if ( (now - s->age_last) >= ((uint64_t)zone->branch_age_period * 1000000) ) { s->age = 1; } } } /* branching */ if (s->workers_branch < zone->branch_prio) { /* schedule branch task using following * priority: * * a. peek node with the largest in-memory index * which is equal or greater then branch * watermark. * If nothing is found, stick to b. * * b. peek node with the largest in-memory index, * which has oldest update time. * * c. if no branch work is needed, schedule a * compaction job * */ task->plan.plan = SI_BRANCH; task->plan.a = zone->branch_wm; rc = se_schedule_plan(s, &task->plan, &db); if (rc == 1) { s->workers_branch++; se_dbref(db, 1); task->db = db; task->gc = 1; ss_mutexunlock(&s->lock); return 1; } } /* compaction */ task->plan.plan = SI_COMPACT; task->plan.a = zone->compact_wm; task->plan.b = zone->compact_mode; rc = se_schedule_plan(s, &task->plan, &db); if (rc == 1) { se_dbref(db, 1); task->db = db; ss_mutexunlock(&s->lock); return 1; } ss_mutexunlock(&s->lock); return 0; }