Esempio n. 1
0
/*
 * __cursor_var_append_next --
 *	Return the next variable-length entry on the append list.
 */
static inline int
__cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage)
{
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;

	if (newpage) {
		cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
		goto new_page;
	}

	for (;;) {
		cbt->ins = WT_SKIP_NEXT(cbt->ins);
new_page:	if (cbt->ins == NULL)
			return (WT_NOTFOUND);

		__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
		if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL)
			continue;
		if (upd->type == WT_UPDATE_DELETED) {
			if (__wt_txn_upd_visible_all(session, upd))
				++cbt->page_deleted_count;
			continue;
		}
		return (__wt_value_return(session, cbt, upd));
	}
	/* NOTREACHED */
}
Esempio n. 2
0
/*
 * __cursor_var_append_next --
 *	Return the next variable-length entry on the append list.
 */
static inline int
__cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage)
{
	WT_ITEM *val;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	val = &cbt->iface.value;

	if (newpage) {
		cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
		goto new_page;
	}

	for (;;) {
		cbt->ins = WT_SKIP_NEXT(cbt->ins);
new_page:	if (cbt->ins == NULL)
			return (WT_NOTFOUND);

		__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
		if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL ||
		    WT_UPDATE_DELETED_ISSET(upd))
			continue;
		val->data = WT_UPDATE_DATA(upd);
		val->size = upd->size;
		break;
	}
	return (0);
}
Esempio n. 3
0
/*btree cursor移向下一个记录,仅仅在append list上移动*/
static inline int __cursor_fix_append_next(WT_CURSOR_BTREE* cbt, int newpage)
{
	WT_ITEM *val;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	val = &cbt->iface.value;

	/*新载入的page,判断ins_head是否为空,如果为空表示没有append的记录*/
	if (newpage){
		if ((cbt->ins = WT_SKIP_FIRST(cbt->ins_head)) == NULL)
			return (WT_NOTFOUND);
	}
	else{
		/*已经到append list的最后一条记录了,后面没有记录*/
		if (cbt->recno >= WT_INSERT_RECNO(cbt->ins) && (cbt->ins = WT_SKIP_NEXT(cbt->ins)) == NULL)
			return (WT_NOTFOUND);
	}
	/*
	* This code looks different from the cursor-previous code.  The append
	* list appears on the last page of the tree, but it may be preceded by
	* other rows, which means the cursor's recno will be set to a value and
	* we simply want to increment it.  If the cursor's recno is NOT set,
	* we're starting our iteration in a tree that has only appended items.
	* In that case, recno will be 0 and happily enough the increment will
	* set it to 1, which is correct.
	*/
	__cursor_set_recno(cbt, cbt->recno + 1);

	/*
	* Fixed-width column store appends are inherently non-transactional.
	* Even a non-visible update by a concurrent or aborted transaction
	* changes the effective end of the data.  The effect is subtle because
	* of the blurring between deleted and empty values, but ideally we
	* would skip all uncommitted changes at the end of the data.  This
	* doesn't apply to variable-width column stores because the implicitly
	* created records written by reconciliation are deleted and so can be
	* never seen by a read.
	*
	* The problem is that we don't know at this point whether there may be
	* multiple uncommitted changes at the end of the data, and it would be
	* expensive to check every time we hit an aborted update.  If an
	* insert is aborted, we simply return zero (empty), regardless of
	* whether we are at the end of the data.
	*/
	if (cbt->recno < WT_INSERT_RECNO(cbt->ins) || (upd = __wt_txn_read(session, cbt->ins->upd)) == NULL){ /*没有可见的记录值,直接返回0*/
		cbt->v = 0;
		val->data = &cbt->v;
	}
	else
		val->data = WT_UPDATE_DATA(upd);
	val->size = 1;
	return 0;
}
Esempio n. 4
0
/*
 * __free_skip_list --
 *	Walk a WT_INSERT forward-linked list and free the per-thread combination
 * of a WT_INSERT structure and its associated chain of WT_UPDATE structures.
 */
static void
__free_skip_list(WT_SESSION_IMPL *session, WT_INSERT *ins)
{
	WT_INSERT *next;

	do {
		__free_update_list(session, ins->upd);

		next = WT_SKIP_NEXT(ins);
		__wt_free(session, ins);
	} while ((ins = next) != NULL);
}
Esempio n. 5
0
/*在append list上移动variable-length类型的btree cursor*/
static inline int __cursor_var_append_next(WT_CURSOR_BTREE* cbt, int newpage)
{
	WT_ITEM *val;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	val = &cbt->iface.value;

	if (newpage){
		cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
		goto new_page;
	}

	for (;;){
		cbt->ins = WT_SKIP_NEXT(cbt->ins);
new_page:
		if (cbt->ins == NULL)
			return (WT_NOTFOUND);

		__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
		/*事务隔离读,对本事务不可见,继续向前*/
		if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL)
			continue;
		/*删除集合,不做指向这条记录,继续向下移动*/
		if (WT_UPDATE_DELETED_ISSET(upd)) {
			++cbt->page_deleted_count;
			continue;
		}
		/*赋值value*/
		val->data = WT_UPDATE_DATA(upd);
		val->size = upd->size;

		return 0;
	}
}
Esempio n. 6
0
/*
 * __cursor_row_next --
 *	Move to the next row-store item.
 */
static inline int
__cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage)
{
	WT_INSERT *ins;
	WT_ITEM *key;
	WT_PAGE *page;
	WT_ROW *rip;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	page = cbt->ref->page;
	key = &cbt->iface.key;

	/*
	 * For row-store pages, we need a single item that tells us the part
	 * of the page we're walking (otherwise switching from next to prev
	 * and vice-versa is just too complicated), so we map the WT_ROW and
	 * WT_INSERT_HEAD insert array slots into a single name space: slot 1
	 * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is
	 * WT_INSERT_HEAD[0], and so on.  This means WT_INSERT lists are
	 * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
	 *
	 * Initialize for each new page.
	 */
	if (newpage) {
		cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
		cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
		cbt->row_iteration_slot = 1;
		cbt->rip_saved = NULL;
		goto new_insert;
	}

	/* Move to the next entry and return the item. */
	for (;;) {
		/*
		 * Continue traversing any insert list; maintain the insert list
		 * head reference and entry count in case we switch to a cursor
		 * previous movement.
		 */
		if (cbt->ins != NULL)
			cbt->ins = WT_SKIP_NEXT(cbt->ins);

new_insert:	if ((ins = cbt->ins) != NULL) {
			if ((upd = __wt_txn_read(session, ins->upd)) == NULL)
				continue;
			if (upd->type == WT_UPDATE_DELETED) {
				if (__wt_txn_upd_visible_all(session, upd))
					++cbt->page_deleted_count;
				continue;
			}
			key->data = WT_INSERT_KEY(ins);
			key->size = WT_INSERT_KEY_SIZE(ins);
			return (__wt_value_return(session, cbt, upd));
		}

		/* Check for the end of the page. */
		if (cbt->row_iteration_slot >= page->entries * 2 + 1)
			return (WT_NOTFOUND);
		++cbt->row_iteration_slot;

		/*
		 * Odd-numbered slots configure as WT_INSERT_HEAD entries,
		 * even-numbered slots configure as WT_ROW entries.
		 */
		if (cbt->row_iteration_slot & 0x01) {
			cbt->ins_head = WT_ROW_INSERT_SLOT(
			    page, cbt->row_iteration_slot / 2 - 1);
			cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
			goto new_insert;
		}
		cbt->ins_head = NULL;
		cbt->ins = NULL;

		cbt->slot = cbt->row_iteration_slot / 2 - 1;
		rip = &page->pg_row[cbt->slot];
		upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
		if (upd != NULL && upd->type == WT_UPDATE_DELETED) {
			if (__wt_txn_upd_visible_all(session, upd))
				++cbt->page_deleted_count;
			continue;
		}
		return (__cursor_row_slot_return(cbt, rip, upd));
	}
	/* NOTREACHED */
}
Esempio n. 7
0
/*移向行存储的下一个行对象*/
static inline int __cursor_row_next(WT_CURSOR_BTREE* cbt, int newpage)
{
	WT_INSERT *ins;
	WT_ITEM *key, *val;
	WT_PAGE *page;
	WT_ROW *rip;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	page = cbt->ref->page;
	key = &cbt->iface.key;
	val = &cbt->iface.value;

	/*假如是newpage,定位到insert修改队列的头位置*/
	if (newpage){
		cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
		cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
		cbt->row_iteration_slot = 1;
		goto new_insert;
	}

	for (;;){
		if (cbt->ins != NULL)
			cbt->ins = WT_SKIP_NEXT(cbt->ins);

new_insert:
		if ((ins = cbt->ins) != NULL) {
			/*事务可见数据读取*/
			if ((upd = __wt_txn_read(session, ins->upd)) == NULL)
				continue;
			/*判断是否删除,如果删除,跳过被删除的对象*/
			if (WT_UPDATE_DELETED_ISSET(upd)) {
				++cbt->page_deleted_count;
				continue;
			}

			key->data = WT_INSERT_KEY(ins);
			key->size = WT_INSERT_KEY_SIZE(ins);
			val->data = WT_UPDATE_DATA(upd);
			val->size = upd->size;
			return 0;
		}

		/*检索page row entires数组, 到了page的末尾*/
		if (cbt->row_iteration_slot >= page->pg_row_entries * 2 + 1)
			return (WT_NOTFOUND);
		++cbt->row_iteration_slot;

		/*
		* Odd-numbered slots configure as WT_INSERT_HEAD entries,
		* even-numbered slots configure as WT_ROW entries.
		*/
		if (cbt->row_iteration_slot & 0x01) {
			cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->row_iteration_slot / 2 - 1);
			cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
			goto new_insert;
		}

		cbt->ins_head = NULL;
		cbt->ins = NULL;

		/*计算定位slot*/
		cbt->slot = cbt->row_iteration_slot / 2 - 1;
		rip = &page->pg_row_d[cbt->slot];

		upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
		if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) {
			++cbt->page_deleted_count;
			continue;
		}

		return __cursor_row_slot_return(cbt, rip, upd);
	}
}