예제 #1
0
int si_plannertrace(siplan *p, sstrace *t)
{
	char *plan = NULL;
	switch (p->plan) {
	case SI_BRANCH: plan = "branch";
		break;
	case SI_AGE: plan = "age";
		break;
	case SI_COMPACT: plan = "compact";
		break;
	case SI_CHECKPOINT: plan = "checkpoint";
		break;
	case SI_GC: plan = "gc";
		break;
	case SI_TEMP: plan = "temperature";
		break;
	case SI_BACKUP:
	case SI_BACKUPEND: plan = "backup";
		break;
	case SI_SHUTDOWN: plan = "database shutdown";
		break;
	case SI_DROP: plan = "database drop";
		break;
	}
	char *explain = NULL;
	switch (p->explain) {
	case SI_ENONE:
		explain = "none";
		break;
	case SI_ERETRY:
		explain = "retry expected";
		break;
	case SI_EINDEX_SIZE:
		explain = "index size";
		break;
	case SI_EINDEX_AGE:
		explain = "index age";
		break;
	case SI_EBRANCH_COUNT:
		explain = "branch count";
		break;
	case SI_ETEMP:
		explain = "temperature";
		break;
	}
	if (p->node) {
		ss_trace(t, "%s <#%" PRIu32 " explain: %s>",
		         plan,
		         p->node->self.id.id, explain);
	} else {
		ss_trace(t, "%s <explain: %s>", plan, explain);
	}
	return 0;
}
예제 #2
0
static inline int
se_backupcomplete(sescheduler *s, seworker *w)
{
	/*
	 * a. rotate log file
	 * b. copy log files
	 * c. enable log gc
	 * d. rename <bsn.incomplete> into <bsn>
	 * e. set last backup, set COMPLETE
	 */
	se *e = (se*)s->env;

	/* force log rotation */
	ss_trace(&w->trace, "%s", "log rotation for backup");
	int rc = sl_poolrotate(&e->lp);
	if (ssunlikely(rc == -1))
		return -1;

	/* copy log files */
	ss_trace(&w->trace, "%s", "log files backup");

	char path[1024];
	snprintf(path, sizeof(path), "%s/%" PRIu32 ".incomplete/log",
	         e->conf.backup_path, s->backup_bsn);
	rc = sl_poolcopy(&e->lp, path, &w->dc.c);
	if (ssunlikely(rc == -1)) {
		sr_errorrecover(&e->error);
		return -1;
	}

	/* enable log gc */
	sl_poolgc_enable(&e->lp, 1);

	/* complete backup */
	snprintf(path, sizeof(path), "%s/%" PRIu32 ".incomplete",
	         e->conf.backup_path, s->backup_bsn);
	char newpath[1024];
	snprintf(newpath, sizeof(newpath), "%s/%" PRIu32,
	         e->conf.backup_path, s->backup_bsn);
	rc = rename(path, newpath);
	if (ssunlikely(rc == -1)) {
		sr_error(&e->error, "backup directory '%s' rename error: %s",
		         path, strerror(errno));
		return -1;
	}

	/* complete */
	s->backup_last = s->backup_bsn;
	s->backup_last_complete = 1;
	s->backup = 0;
	s->backup_bsn = 0;
	return 0;
}
예제 #3
0
int sc_backupend(sc *s, scworker *w)
{
	/*
	 * a. rotate log file
	 * b. copy log files
	 * c. enable log gc
	 * d. rename <bsn.incomplete> into <bsn>
	 * e. set last backup, set COMPLETE
	 */

	/* force log rotation */
	ss_trace(&w->trace, "%s", "log rotation for backup");
	int rc = sl_poolrotate(s->lp);
	if (ssunlikely(rc == -1))
		return -1;

	/* copy log files */
	ss_trace(&w->trace, "%s", "log files backup");

	char path[1024];
	snprintf(path, sizeof(path), "%s/%" PRIu32 ".incomplete/log",
	         s->backup_path, s->backup_bsn);
	rc = sl_poolcopy(s->lp, path, &w->dc.c);
	if (ssunlikely(rc == -1))
		return -1;

	/* complete backup */
	snprintf(path, sizeof(path), "%s/%" PRIu32 ".incomplete",
	         s->backup_path, s->backup_bsn);
	char newpath[1024];
	snprintf(newpath, sizeof(newpath), "%s/%" PRIu32,
	         s->backup_path, s->backup_bsn);
	rc = ss_vfsrename(s->r->vfs, path, newpath);
	if (ssunlikely(rc == -1)) {
		sr_error(s->r->e, "backup directory '%s' rename error: %s",
		         path, strerror(errno));
		return -1;
	}

	/* enable log gc */
	sl_poolgc_enable(s->lp, 1);

	/* complete */
	ss_mutexlock(&s->lock);
	s->backup_bsn_last = s->backup_bsn;
	s->backup_bsn_last_complete = 1;
	s->backup_in_progress = 0;
	s->backup = 0;
	s->backup_bsn = 0;
	ss_mutexunlock(&s->lock);
	return 0;
}
예제 #4
0
파일: sc_step.c 프로젝트: pmwkaa/sophia
static inline int
sc_gc(sc *s, scworker *w)
{
	ss_trace(&w->trace, "%s", "log gc");
	int rc = sw_managergc(s->wm);
	if (ssunlikely(rc == -1))
		return -1;
	return 0;
}
예제 #5
0
int se_scheduler(sescheduler *s, seworker *w)
{
	setask task;
	int rc = se_schedule(s, &task, w);
	int job = rc;
	if (task.rotate) {
		rc = se_rotate(s, w);
		if (ssunlikely(rc == -1))
			goto error;
	}
	if (task.req) {
		rc = se_dispatch(s, w, &task);
		if (ssunlikely(rc == -1)) {
			goto error;
		}
	}
	se *e = (se*)s->env;
	if (task.backup_complete)
		se_reqonbackup(e);
	if (job) {
		rc = se_run(&task, w);
		if (ssunlikely(rc == -1)) {
			if (task.plan.plan != SI_BACKUP &&
			    task.plan.plan != SI_BACKUPEND) {
				se_dbmalfunction(task.db);
				goto error;
			}
			ss_mutexlock(&s->lock);
			se_backuperror(s);
			ss_mutexunlock(&s->lock);
		}
	}
	if (task.gc) {
		rc = se_gc(s, w);
		if (ssunlikely(rc == -1))
			goto error;
	}
	se_complete(s, &task);
	ss_trace(&w->trace, "%s", "sleep");
	return job;
error:
	ss_trace(&w->trace, "%s", "malfunction");
	return -1;
}
예제 #6
0
static int
se_gc(sescheduler *s, seworker *w)
{
	ss_trace(&w->trace, "%s", "log gc");
	se *e = (se*)s->env;
	int rc = sl_poolgc(&e->lp);
	if (ssunlikely(rc == -1))
		return -1;
	return 0;
}
예제 #7
0
파일: sc_step.c 프로젝트: pmwkaa/sophia
int sc_step(sc *s, scworker *w, uint64_t vlsn)
{
	sctask task;
	sc_taskbegin(&task, w, vlsn);
	int rc = sc_schedule(s, &task);
	int rc_job = rc;
	/* log rotation */
	if (task.rotate) {
		rc = sc_rotate(s, w);
		if (ssunlikely(rc == -1))
			goto error;
	}
	/* backup completion */
	if (task.backup) {
		rc = sc_backupend(s, w);
		if (ssunlikely(rc == -1))
			sc_backupstop(s);
	}
	if (rc_job == 1) {
		rc = sc_execute(&task, w, vlsn);
		if (ssunlikely(rc == -1)) {
			if (task.plan.plan != SI_BACKUP &&
			    task.plan.plan != SI_BACKUPEND) {
				sr_statusset(s->r->status, SR_MALFUNCTION);
				goto error;
			}
			sc_backupstop(s);
		}
	}
	sc_taskend(s, &task);
	if (task.gc) {
		rc = sc_gc(s, w);
		if (ssunlikely(rc == -1))
			goto error;
	}
	ss_trace(&w->trace, "%s", "sleep");
	return rc_job;
error:
	ss_trace(&w->trace, "%s", "malfunction");
	return -1;
}
예제 #8
0
파일: sc_step.c 프로젝트: pmwkaa/sophia
static inline int
sc_rotate(sc *s, scworker *w)
{
	ss_trace(&w->trace, "%s", "log rotation");
	int rc = sw_managerrotate_ready(s->wm);
	if (rc) {
		rc = sw_managerrotate(s->wm);
		if (ssunlikely(rc == -1))
			return -1;
	}
	return 0;
}
예제 #9
0
static int
se_rotate(sescheduler *s, seworker *w)
{
	ss_trace(&w->trace, "%s", "log rotation");
	se *e = (se*)s->env;
	int rc = sl_poolrotate_ready(&e->lp, e->conf.log_rotate_wm);
	if (rc) {
		rc = sl_poolrotate(&e->lp);
		if (ssunlikely(rc == -1))
			return -1;
	}
	return 0;
}
예제 #10
0
static int
se_dispatch(sescheduler *s, seworker *w, setask *t)
{
	ss_trace(&w->trace, "%s", "dispatcher");
	se *e = (se*)s->env;
	int block = t->req == 1;
	do {
		int rc = se_active(e);
		if (ssunlikely(rc == 0))
			break;
		sereq *req = se_reqdispatch(e, block);
		if (req) {
			se_execute(req);
			se_reqready(req);
		}
	} while (block);
	return 0;
}
예제 #11
0
static int
se_dispatch(sescheduler *s, seworker *w, setask *t)
{
	ss_trace(&w->trace, "%s", "dispatcher");
	se *e = (se*)s->env;
	int block = t->req == 1;
	do {
		int rc = se_active(e);
		if (ssunlikely(rc == 0))
			break;
		sereq *req = se_reqdispatch(e, block);
		if (req) {
			switch (req->op) {
			case SE_REQREAD:
				se_execute_read(req);
				break;
			default: assert(0);
			}
			se_reqready(req);
		}
	} while (block);
	return 0;
}
예제 #12
0
static int
se_schedule(sescheduler *s, setask *task, seworker *w)
{
	ss_trace(&w->trace, "%s", "schedule");
	si_planinit(&task->plan);

	uint64_t now = ss_utime();
	se *e = (se*)s->env;
	sedb *db;
	srzone *zone = se_zoneof(e);
	assert(zone != NULL);

	task->checkpoint_complete = 0;
	task->backup_complete = 0;
	task->rotate = 0;
	task->req = 0;
	task->gc = 0;
	task->db = NULL;

	ss_mutexlock(&s->lock);

	/* asynchronous reqs dispatcher */
	if (s->req == 0) {
		switch (zone->async) {
		case 2:
			if (se_reqqueue(e) == 0)
				break;
		case 1:
			s->req = 1;
			task->req = zone->async;
			ss_mutexunlock(&s->lock);
			return 0;
		}
	}

	/* log gc and rotation */
	if (s->rotate == 0)
	{
		task->rotate = 1;
		s->rotate = 1;
	}

	/* checkpoint */
	int in_progress = 0;
	int rc;
checkpoint:
	if (s->checkpoint) {
		task->plan.plan = SI_CHECKPOINT;
		task->plan.a = s->checkpoint_lsn;
		rc = se_schedule_plan(s, &task->plan, &db);
		switch (rc) {
		case 1:
			s->workers_branch++;
			se_dbref(db, 1);
			task->db = db;
			task->gc = 1;
			ss_mutexunlock(&s->lock);
			return 1;
		case 2: /* work in progress */
			in_progress = 1;
			break;
		case 0: /* complete checkpoint */
			s->checkpoint = 0;
			s->checkpoint_lsn_last = s->checkpoint_lsn;
			s->checkpoint_lsn = 0;
			task->checkpoint_complete = 1;
			break;
		}
	}

	/* apply zone policy */
	switch (zone->mode) {
	case 0:  /* compact_index */
	case 1:  /* compact_index + branch_count prio */
		assert(0);
		break;
	case 2:  /* checkpoint */
	{
		if (in_progress) {
			ss_mutexunlock(&s->lock);
			return 0;
		}
		uint64_t lsn = sr_seq(&e->seq, SR_LSN);
		s->checkpoint_lsn = lsn;
		s->checkpoint = 1;
		goto checkpoint;
	}
	default: /* branch + compact */
		assert(zone->mode == 3);
	}

	/* database shutdown-drop */
	if (s->workers_gc_db < zone->gc_db_prio) {
		ss_spinlock(&e->dblock);
		db = NULL;
		if (ssunlikely(e->db_shutdown.n > 0)) {
			db = (sedb*)so_listfirst(&e->db_shutdown);
			if (se_dbgarbage(db)) {
				so_listdel(&e->db_shutdown, &db->o);
			} else {
				db = NULL;
			}
		}
		ss_spinunlock(&e->dblock);
		if (ssunlikely(db)) {
			if (db->dropped)
				task->plan.plan = SI_DROP;
			else
				task->plan.plan = SI_SHUTDOWN;
			s->workers_gc_db++;
			se_dbref(db, 1);
			task->db = db;
			ss_mutexunlock(&s->lock);
			return 1;
		}
	}

	/* backup */
	if (s->backup && (s->workers_backup < zone->backup_prio))
	{
		/* backup procedure.
		 *
		 * state 0 (start)
		 * -------
		 *
		 * a. disable log gc
		 * b. mark to start backup (state 1)
		 *
		 * state 1 (background, delayed start)
		 * -------
		 *
		 * a. create backup_path/<bsn.incomplete> directory
		 * b. create database directories
		 * c. create log directory
		 * d. state 2
		 *
		 * state 2 (background, copy)
		 * -------
		 *
		 * a. schedule and execute node backup which bsn < backup_bsn
		 * b. state 3
		 *
		 * state 3 (background, completion)
		 * -------
		 *
		 * a. rotate log file
		 * b. copy log files
		 * c. enable log gc, schedule gc
		 * d. rename <bsn.incomplete> into <bsn>
		 * e. set last backup, set COMPLETE
		 *
		*/
		if (s->backup == 1) {
			/* state 1 */
			rc = se_backupstart(s);
			if (ssunlikely(rc == -1)) {
				se_backuperror(s);
				goto backup_error;
			}
			s->backup = 2;
		}
		/* state 2 */
		task->plan.plan = SI_BACKUP;
		task->plan.a = s->backup_bsn;
		rc = se_schedule_plan(s, &task->plan, &db);
		switch (rc) {
		case 1:
			s->workers_backup++;
			se_dbref(db, 1);
			task->db = db;
			ss_mutexunlock(&s->lock);
			return 1;
		case 2: /* work in progress */
			break;
		case 0: /* state 3 */
			rc = se_backupcomplete(s, w);
			if (ssunlikely(rc == -1)) {
				se_backuperror(s);
				goto backup_error;
			}
			s->backup_events++;
			task->gc = 1;
			task->backup_complete = 1;
			break;
		}
backup_error:;
	}

	/* garbage-collection */
	if (s->gc) {
		if (s->workers_gc < zone->gc_prio) {
			task->plan.plan = SI_GC;
			task->plan.a = sx_vlsn(&e->xm);
			task->plan.b = zone->gc_wm;
			rc = se_schedule_plan(s, &task->plan, &db);
			switch (rc) {
			case 1:
				s->workers_gc++;
				se_dbref(db, 1);
				task->db = db;
				ss_mutexunlock(&s->lock);
				return 1;
			case 2: /* work in progress */
				break;
			case 0: /* state 3 */
				s->gc = 0;
				s->gc_last = now;
				break;
			}
		}
	} else {
		if (zone->gc_prio && zone->gc_period) {
			if ( (now - s->gc_last) >= ((uint64_t)zone->gc_period * 1000000) ) {
				s->gc = 1;
			}
		}
	}

	/* index aging */
	if (s->age) {
		if (s->workers_branch < zone->branch_prio) {
			task->plan.plan = SI_AGE;
			task->plan.a = zone->branch_age * 1000000; /* ms */
			task->plan.b = zone->branch_age_wm;
			rc = se_schedule_plan(s, &task->plan, &db);
			switch (rc) {
			case 1:
				s->workers_branch++;
				se_dbref(db, 1);
				task->db = db;
				ss_mutexunlock(&s->lock);
				return 1;
			case 0:
				s->age = 0;
				s->age_last = now;
				break;
			}
		}
	} else {
		if (zone->branch_prio && zone->branch_age_period) {
			if ( (now - s->age_last) >= ((uint64_t)zone->branch_age_period * 1000000) ) {
				s->age = 1;
			}
		}
	}

	/* branching */
	if (s->workers_branch < zone->branch_prio)
	{
		/* schedule branch task using following
		 * priority:
		 *
		 * a. peek node with the largest in-memory index
		 *    which is equal or greater then branch
		 *    watermark.
		 *    If nothing is found, stick to b.
		 *
		 * b. peek node with the largest in-memory index,
		 *    which has oldest update time.
		 *
		 * c. if no branch work is needed, schedule a
		 *    compaction job
		 *
		 */
		task->plan.plan = SI_BRANCH;
		task->plan.a = zone->branch_wm;
		rc = se_schedule_plan(s, &task->plan, &db);
		if (rc == 1) {
			s->workers_branch++;
			se_dbref(db, 1);
			task->db = db;
			task->gc = 1;
			ss_mutexunlock(&s->lock);
			return 1;
		}
	}

	/* compaction */
	task->plan.plan = SI_COMPACT;
	task->plan.a = zone->compact_wm;
	task->plan.b = zone->compact_mode;
	rc = se_schedule_plan(s, &task->plan, &db);
	if (rc == 1) {
		se_dbref(db, 1);
		task->db = db;
		ss_mutexunlock(&s->lock);
		return 1;
	}

	ss_mutexunlock(&s->lock);
	return 0;
}
예제 #13
0
파일: sc_step.c 프로젝트: pmwkaa/sophia
static inline siplannerrc
sc_do(sc *s, sctask *task)
{
	siplannerrc rc;
	scdb *db = task->db;
	sicompaction *c = &db->index->scheme.compaction;

	ss_trace(&task->w->trace, "%s", "schedule");

	/* checkpoint */
	if (db->checkpoint) {
		task->plan.plan = SI_CHECKPOINT;
		task->plan.a = db->checkpoint_vlsn;
		rc = si_plan(db->index, &task->plan);
		switch (rc) {
		case SI_PMATCH:
			return rc;
		case SI_PNONE:
			sc_task_checkpoint_done(db, task->time);
			break;
		case SI_PRETRY:
			break;
		}
	}

	/* node delayed gc */
	task->plan.plan = SI_NODEGC;
	rc = si_plan(db->index, &task->plan);
	if (rc == SI_PMATCH)
		return rc;

	/* backup */
	if (db->backup)
	{
		/* backup procedure.
		 *
		 * state 0 (start)
		 * -------
		 *
		 * a. disable log gc
		 * b. mark to start backup (state 1)
		 *
		 * state 1 (background, delayed start)
		 * -------
		 *
		 * a. create backup_path/<bsn.incomplete> directory
		 * b. create database directories
		 * c. create log directory
		 * d. state 2
		 *
		 * state 2 (background, copy)
		 * -------
		 *
		 * a. schedule and execute node backup which bsn < backup_bsn
		 * b. state 3
		 *
		 * state 3 (background, completion)
		 * -------
		 *
		 * a. rotate log file
		 * b. copy log files
		 * c. enable log gc, schedule gc
		 * d. rename <bsn.incomplete> into <bsn>
		 * e. set last backup, set COMPLETE
		 *
		*/

		/* state 2 */
		task->plan.plan = SI_BACKUP;
		task->plan.a = s->backup_bsn;
		rc = sc_plan(s, task, SC_QBACKUP);
		switch (rc) {
		case SI_PMATCH:
			db->workers[SC_QBACKUP]++;
			task->db = db;
			return SI_PMATCH;
		case SI_PNONE:
			sc_task_backup_done(task->db);
			assert(s->backup_in_progress > 0);
			s->backup_in_progress--;
			/* state 3 */
			if (s->backup_in_progress == 0)
				task->backup = 1;
			break;
		case SI_PRETRY:
			break;
		}
	}

	/* expire */
	if (db->expire) {
		task->plan.plan = SI_EXPIRE;
		task->plan.a = db->index->scheme.expire;
		rc = sc_plan(s, task, SC_QEXPIRE);
		switch (rc) {
		case SI_PMATCH:
			db->workers[SC_QEXPIRE]++;
			return SI_PMATCH;
		case SI_PNONE:
			sc_task_expire_done(db, task->time);
			break;
		case SI_PRETRY:
			break;
		}
	}

	/* garbage-collection */
	if (db->gc) {
		task->plan.plan = SI_GC;
		task->plan.a = task->vlsn;
		task->plan.b = c->gc_wm;
		rc = sc_plan(s, task, SC_QGC);
		switch (rc) {
		case SI_PMATCH:
			db->workers[SC_QGC]++;
			return SI_PMATCH;
		case SI_PNONE:
			sc_task_gc_done(db, task->time);
			break;
		case SI_PRETRY:
			break;
		}
	}

	/* compaction */
	task->plan.plan = SI_COMPACTION;
	rc = si_plan(db->index, &task->plan);
	if (rc == SI_PMATCH)
		return SI_PMATCH;

	si_planinit(&task->plan);
	return SI_PNONE;
}