Ejemplo n.º 1
0
/*
 * __cursor_var_prev --
 *	Move to the previous, variable-length column-store item.
 */
static inline int
__cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
{
	WT_CELL *cell;
	WT_CELL_UNPACK unpack;
	WT_COL *cip;
	WT_INSERT *ins;
	WT_ITEM *val;
	WT_PAGE *page;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;
	uint64_t rle_start;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	page = cbt->ref->page;
	val = &cbt->iface.value;

	rle_start = 0;			/* -Werror=maybe-uninitialized */

	/* Initialize for each new page. */
	if (newpage) {
		cbt->last_standard_recno = __col_var_last_recno(page);
		if (cbt->last_standard_recno == 0)
			return (WT_NOTFOUND);
		__cursor_set_recno(cbt, cbt->last_standard_recno);
		goto new_page;
	}

	/* Move to the previous entry and return the item. */
	for (;;) {
		__cursor_set_recno(cbt, cbt->recno - 1);

new_page:	if (cbt->recno < page->pg_var_recno)
			return (WT_NOTFOUND);

		/* Find the matching WT_COL slot. */
		if ((cip =
		    __col_var_search(page, cbt->recno, &rle_start)) == NULL)
			return (WT_NOTFOUND);
		cbt->slot = WT_COL_SLOT(page, cip);

		/* Check any insert list for a matching record. */
		cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
		cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
		upd = cbt->ins == NULL ?
		    NULL : __wt_txn_read(session, cbt->ins->upd);
		if (upd != NULL) {
			if (WT_UPDATE_DELETED_ISSET(upd)) {
				if (__wt_txn_visible_all(session, upd->txnid))
					++cbt->page_deleted_count;
				continue;
			}

			val->data = WT_UPDATE_DATA(upd);
			val->size = upd->size;
			return (0);
		}

		/*
		 * If we're at the same slot as the last reference and there's
		 * no matching insert list item, re-use the return information
		 * (so encoded items with large repeat counts aren't repeatedly
		 * decoded).  Otherwise, unpack the cell and build the return
		 * information.
		 */
		if (cbt->cip_saved != cip) {
			if ((cell = WT_COL_PTR(page, cip)) == NULL)
				continue;
			__wt_cell_unpack(cell, &unpack);
			if (unpack.type == WT_CELL_DEL) {
				if (__wt_cell_rle(&unpack) == 1)
					continue;
				/*
				 * There can be huge gaps in the variable-length
				 * column-store name space appearing as deleted
				 * records. If more than one deleted record, do
				 * the work of finding the next record to return
				 * instead of looping through the records.
				 *
				 * First, find the largest record in the update
				 * list that's smaller than the current record.
				 */
				ins = __col_insert_search_lt(
				    cbt->ins_head, cbt->recno);

				/*
				 * Second, for records with RLEs greater than 1,
				 * the above call to __col_var_search located
				 * this record in the page's list of repeating
				 * records, and returned the starting record.
				 * The starting record - 1 is the record to
				 * which we could skip, if there was no larger
				 * record in the update list.
				 */
				cbt->recno = rle_start - 1;
				if (ins != NULL &&
				    WT_INSERT_RECNO(ins) > cbt->recno)
					cbt->recno = WT_INSERT_RECNO(ins);

				/* Adjust for the outer loop decrement. */
				++cbt->recno;
				continue;
			}
			WT_RET(__wt_page_cell_data_ref(
			    session, page, &unpack, cbt->tmp));

			cbt->cip_saved = cip;
		}
		val->data = cbt->tmp->data;
		val->size = cbt->tmp->size;
		return (0);
	}
	/* NOTREACHED */
}
Ejemplo n.º 2
0
/*
 * __cursor_var_prev --
 *	Move to the previous, variable-length column-store item.
 */
static inline int
__cursor_var_prev(WT_CURSOR_BTREE *cbt, int newpage)
{
	WT_CELL *cell;
	WT_CELL_UNPACK unpack;
	WT_COL *cip;
	WT_ITEM *val;
	WT_PAGE *page;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	page = cbt->ref->page;
	val = &cbt->iface.value;

	/* Initialize for each new page. */
	if (newpage) {
		cbt->last_standard_recno = __col_var_last_recno(page);
		if (cbt->last_standard_recno == 0)
			return (WT_NOTFOUND);
		__cursor_set_recno(cbt, cbt->last_standard_recno);
		goto new_page;
	}

	/* Move to the previous entry and return the item. */
	for (;;) {
		__cursor_set_recno(cbt, cbt->recno - 1);

new_page:	if (cbt->recno < page->pg_var_recno)
			return (WT_NOTFOUND);

		/* Find the matching WT_COL slot. */
		if ((cip = __col_var_search(page, cbt->recno)) == NULL)
			return (WT_NOTFOUND);
		cbt->slot = WT_COL_SLOT(page, cip);

		/* Check any insert list for a matching record. */
		cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
		cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
		upd = cbt->ins == NULL ?
		    NULL : __wt_txn_read(session, cbt->ins->upd);
		if (upd != NULL) {
			if (WT_UPDATE_DELETED_ISSET(upd)) {
				++cbt->page_deleted_count;
				continue;
			}

			val->data = WT_UPDATE_DATA(upd);
			val->size = upd->size;
			return (0);
		}

		/*
		 * If we're at the same slot as the last reference and there's
		 * no matching insert list item, re-use the return information
		 * (so encoded items with large repeat counts aren't repeatedly
		 * decoded).  Otherwise, unpack the cell and build the return
		 * information.
		 */
		if (cbt->cip_saved != cip) {
			if ((cell = WT_COL_PTR(page, cip)) == NULL)
				continue;
			__wt_cell_unpack(cell, &unpack);
			if (unpack.type == WT_CELL_DEL)
				continue;
			WT_RET(__wt_page_cell_data_ref(
			    session, page, &unpack, &cbt->tmp));

			cbt->cip_saved = cip;
		}
		val->data = cbt->tmp.data;
		val->size = cbt->tmp.size;
		return (0);
	}
	/* NOTREACHED */
}
Ejemplo n.º 3
0
/*移向下条variable-length column-store 记录*/
static inline int __cursor_var_next(WT_CURSOR_BTREE* cbt, int newpage)
{
	WT_CELL *cell;
	WT_CELL_UNPACK unpack;
	WT_COL *cip;
	WT_ITEM *val;
	WT_INSERT *ins;
	WT_PAGE *page;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;
	uint64_t rle, rle_start;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	page = cbt->ref->page;
	val = &cbt->iface.value;

	rle_start = 0;			/* -Werror=maybe-uninitialized */

	if (newpage){
		cbt->last_standard_recno = __col_var_last_recno(page);
		if (cbt->last_standard_recno == 0)
			return (WT_NOTFOUND);
		__cursor_set_recno(cbt, page->pg_var_recno);
		goto new_page;
	}

	for (;;){
		if (cbt->recno >= cbt->last_standard_recno)
			return (WT_NOTFOUND);
		__cursor_set_recno(cbt, cbt->recno + 1);

	new_page:
		/*定位到recno对应的WT_COL slot*/
		if ((cip = __col_var_search(page, cbt->recno, &rle_start)) == NULL)
			return (WT_NOTFOUND);
		cbt->slot = WT_COL_SLOT(page, cip);

		/*读取内容值*/
		cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
		cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
		upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
		if (upd != NULL) {
			if (WT_UPDATE_DELETED_ISSET(upd)) {
				++cbt->page_deleted_count;
				continue;
			}

			val->data = WT_UPDATE_DATA(upd);
			val->size = upd->size;
			return (0);
		}

		/*
		* If we're at the same slot as the last reference and there's
		* no matching insert list item, re-use the return information
		* (so encoded items with large repeat counts aren't repeatedly
		* decoded).  Otherwise, unpack the cell and build the return
		* information.
		* upd == NULL, 记录可能被删除放入到了insert列表中,slot可能被重用了,那么需要进行cell unpack取值
		*/

		if (cbt->cip_saved != cip) {
			if ((cell = WT_COL_PTR(page, cip)) == NULL)
				continue;
			__wt_cell_unpack(cell, &unpack);
			if (unpack.type == WT_CELL_DEL) {
				if ((rle = __wt_cell_rle(&unpack)) == 1)
					continue;

				/*定位到修改列表中的记录*/
				ins = __col_insert_search_gt(cbt->ins_head, cbt->recno);
				cbt->recno = rle_start + rle;
				if (ins != NULL && WT_INSERT_RECNO(ins) < cbt->recno)
					cbt->recno = WT_INSERT_RECNO(ins);

				/* Adjust for the outer loop increment. */
				--cbt->recno;
				continue;
			}

			/*取出cell中的值到tmp中*/
			WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &cbt->tmp));
			cbt->cip_saved = cip;
		}
		val->data = cbt->tmp.data;
		val->size = cbt->tmp.size;
		return 0;
	}
}
Ejemplo n.º 4
0
int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
	WT_BTREE *btree;
	WT_CELL *cell;
	WT_CELL_UNPACK unpack;
	WT_CURSOR *cursor;
	WT_PAGE *page;
	WT_ROW *rip;
	uint8_t v;

	switch (page->type){
	case WT_PAGE_COL_FIX:
		cursor->recno = cbt->recno;
		/*cursor对应的是一个upd,直接返回value*/
		if (upd != NULL){
			cursor->value.data = WT_UPDATE_DATA(upd);
			cursor->value.size = upd->size;
			return 0;
		}

		v = __bit_getv_recno(page, cbt->iface.recno, btree->bitcnt);
		return __wt_buf_set(session, &cursor->value, &v, 1);
		
	case WT_PAGE_COL_VAR:
		cursor->recno = cbt->recno;

		if (upd != NULL) {
			cursor->value.data = WT_UPDATE_DATA(upd);
			cursor->value.size = upd->size;
			return (0);
		}
		/*获得对应的cell,并通过cell得到K/V值*/
		cell = WT_COL_PTR(page, &page->pg_var_d[cbt->slot]);
		break;

	case WT_PAGE_ROW_LEAF:
		rip = &page->pg_row_d[cbt->slot];

		if (cbt->ins != NULL){ /*插入的k/v对*/
			cursor->key.data = WT_INSERT_KEY(cbt->ins);
			cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins);
		}
		else if (cbt->compare == 0){/*比较器定位到了对应的k/v对*/
			cursor->key.data = cbt->search_key.data;
			cursor->key.size = cbt->search_key.size;
		}
		else
			WT_RET(__wt_row_leaf_key(session, page, rip, &cursor->key, 0)); /*设置key的值*/

		/*值是在append/update list当中,从当中取*/
		if (upd != NULL) {
			cursor->value.data = WT_UPDATE_DATA(upd);
			cursor->value.size = upd->size;
			return (0);
		}
		/*可以直接通过rip指针获得value,K/V是存储在cell空间之内*/
		if (__wt_row_leaf_value(page, rip, &cursor->value))
			return 0;

		/*不是连续存储的,需要通过解析cell来定位到value*/
		if (cell = __wt_row_leaf_value_cell(page, rip, NULL) == NULL){
			cursor->value.size = 0;
			return 0;
		}
		break;

		WT_ILLEGAL_VALUE(session);
	}
	/*通过cell解析到对应的value值, ovfl item*/
	__wt_cell_unpack(cell, &unpack);
	WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &cursor->value));

	return 0;
}
Ejemplo n.º 5
0
/*
 * __wt_kv_return --
 *	Return a page referenced key/value pair to the application.
 */
int
__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
	WT_BTREE *btree;
	WT_CELL *cell;
	WT_CELL_UNPACK unpack;
	WT_CURSOR *cursor;
	WT_ITEM *tmp;
	WT_PAGE *page;
	WT_ROW *rip;
	uint8_t v;

	btree = S2BT(session);

	page = cbt->ref->page;
	cursor = &cbt->iface;

	switch (page->type) {
	case WT_PAGE_COL_FIX:
		/*
		 * The interface cursor's record has usually been set, but that
		 * isn't universally true, specifically, cursor.search_near may
		 * call here without first setting the interface cursor.
		 */
		cursor->recno = cbt->recno;

		/* If the cursor references a WT_UPDATE item, return it. */
		if (upd != NULL) {
			cursor->value.data = WT_UPDATE_DATA(upd);
			cursor->value.size = upd->size;
			return (0);
		}

		/* Take the value from the original page. */
		v = __bit_getv_recno(page, cbt->iface.recno, btree->bitcnt);
		return (__wt_buf_set(session, &cursor->value, &v, 1));
	case WT_PAGE_COL_VAR:
		/*
		 * The interface cursor's record has usually been set, but that
		 * isn't universally true, specifically, cursor.search_near may
		 * call here without first setting the interface cursor.
		 */
		cursor->recno = cbt->recno;

		/* If the cursor references a WT_UPDATE item, return it. */
		if (upd != NULL) {
			cursor->value.data = WT_UPDATE_DATA(upd);
			cursor->value.size = upd->size;
			return (0);
		}

		/* Take the value from the original page cell. */
		cell = WT_COL_PTR(page, &page->pg_var_d[cbt->slot]);
		break;
	case WT_PAGE_ROW_LEAF:
		rip = &page->pg_row_d[cbt->slot];

		/*
		 * If the cursor references a WT_INSERT item, take its key.
		 * Else, if we have an exact match, we copied the key in the
		 * search function, take it from there.
		 * If we don't have an exact match, take the key from the
		 * original page.
		 */
		if (cbt->ins != NULL) {
			cursor->key.data = WT_INSERT_KEY(cbt->ins);
			cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins);
		} else if (cbt->compare == 0) {
			/*
			 * If not in an insert list and there's an exact match,
			 * the row-store search function built the key we want
			 * to return in the cursor's temporary buffer. Swap the
			 * cursor's search-key and temporary buffers so we can
			 * return it (it's unsafe to return the temporary buffer
			 * itself because our caller might do another search in
			 * this table using the key we return, and we'd corrupt
			 * the search key during any subsequent search that used
			 * the temporary buffer.
			 */
			tmp = cbt->row_key;
			cbt->row_key = cbt->tmp;
			cbt->tmp = tmp;

			cursor->key.data = cbt->row_key->data;
			cursor->key.size = cbt->row_key->size;
		} else
			WT_RET(__wt_row_leaf_key(
			    session, page, rip, &cursor->key, false));

		/* If the cursor references a WT_UPDATE item, return it. */
		if (upd != NULL) {
			cursor->value.data = WT_UPDATE_DATA(upd);
			cursor->value.size = upd->size;
			return (0);
		}

		/* Simple values have their location encoded in the WT_ROW. */
		if (__wt_row_leaf_value(page, rip, &cursor->value))
			return (0);

		/*
		 * Take the value from the original page cell (which may be
		 * empty).
		 */
		if ((cell =
		    __wt_row_leaf_value_cell(page, rip, NULL)) == NULL) {
			cursor->value.size = 0;
			return (0);
		}
		break;
	WT_ILLEGAL_VALUE(session);
	}

	/* The value is an on-page cell, unpack and expand it as necessary. */
	__wt_cell_unpack(cell, &unpack);
	WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &cursor->value));

	return (0);
}