Пример #1
0
/*
 * __wt_txn_truncate_log --
 *	Begin truncating a range of a file.
 */
int
__wt_txn_truncate_log(
    WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
{
	WT_BTREE *btree;
	WT_ITEM *item;
	WT_TXN_OP *op;

	btree = S2BT(session);

	WT_RET(__txn_next_op(session, &op));

	if (btree->type == BTREE_ROW) {
		op->type = WT_TXN_OP_TRUNCATE_ROW;
		op->u.truncate_row.mode = WT_TXN_TRUNC_ALL;
		WT_CLEAR(op->u.truncate_row.start);
		WT_CLEAR(op->u.truncate_row.stop);
		if (start != NULL) {
			op->u.truncate_row.mode = WT_TXN_TRUNC_START;
			item = &op->u.truncate_row.start;
			WT_RET(__wt_cursor_get_raw_key(&start->iface, item));
			WT_RET(__wt_buf_set(
			    session, item, item->data, item->size));
		}
		if (stop != NULL) {
			op->u.truncate_row.mode =
			    (op->u.truncate_row.mode == WT_TXN_TRUNC_ALL) ?
			    WT_TXN_TRUNC_STOP : WT_TXN_TRUNC_BOTH;
			item = &op->u.truncate_row.stop;
			WT_RET(__wt_cursor_get_raw_key(&stop->iface, item));
			WT_RET(__wt_buf_set(
			    session, item, item->data, item->size));
		}
	} else {
		op->type = WT_TXN_OP_TRUNCATE_COL;
		op->u.truncate_col.start =
		    (start == NULL) ? WT_RECNO_OOB : start->recno;
		op->u.truncate_col.stop =
		    (stop == NULL) ? WT_RECNO_OOB : stop->recno;
	}

	/* Write that operation into the in-memory log. */
	WT_RET(__wt_txn_log_op(session, NULL));

	WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOGGING_INMEM));
	F_SET(session, WT_SESSION_LOGGING_INMEM);
	return (0);
}
Пример #2
0
/*
 * __wt_apply_single_idx --
 *	Apply an operation to a single index of a table.
 */
int
__wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx,
    WT_CURSOR *cur, WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *))
{
	WT_CURSOR_STATIC_INIT(iface,
	    __wt_cursor_get_key,	/* get-key */
	    __wt_cursor_get_value,	/* get-value */
	    __wt_cursor_set_key,	/* set-key */
	    __wt_cursor_set_value,	/* set-value */
	    __wt_cursor_notsup,		/* compare */
	    __wt_cursor_notsup,		/* equals */
	    __wt_cursor_notsup,		/* next */
	    __wt_cursor_notsup,		/* prev */
	    __wt_cursor_notsup,		/* reset */
	    __wt_cursor_notsup,		/* search */
	    __wt_cursor_notsup,		/* search-near */
	    __curextract_insert,	/* insert */
	    __wt_cursor_notsup,		/* update */
	    __wt_cursor_notsup,		/* reconfigure */
	    __wt_cursor_notsup,		/* remove */
	    __wt_cursor_notsup);	/* close */
	WT_CURSOR_EXTRACTOR extract_cursor;
	WT_DECL_RET;
	WT_ITEM key, value;

	if (idx->extractor) {
		extract_cursor.iface = iface;
		extract_cursor.iface.session = &session->iface;
		extract_cursor.iface.key_format = idx->exkey_format;
		extract_cursor.ctable = ctable;
		extract_cursor.idxc = cur;
		extract_cursor.f = f;

		WT_RET(__wt_cursor_get_raw_key(&ctable->iface, &key));
		WT_RET(__wt_cursor_get_raw_value(&ctable->iface, &value));
		ret = idx->extractor->extract(idx->extractor,
		    &session->iface, &key, &value,
		    &extract_cursor.iface);

		__wt_buf_free(session, &extract_cursor.iface.key);
		WT_RET(ret);
	} else {
		WT_RET(__wt_schema_project_merge(session,
		    ctable->cg_cursors,
		    idx->key_plan, idx->key_format, &cur->key));
		/*
		 * The index key is now set and the value is empty
		 * (it starts clear and is never set).
		 */
		F_SET(cur, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
		WT_RET(f(cur));
	}
	return (0);
}
Пример #3
0
/*
 * __curextract_insert --
 *	Handle a key produced by a custom extractor.
 */
static int
__curextract_insert(WT_CURSOR *cursor) {
	WT_CURSOR_EXTRACTOR *cextract;
	WT_ITEM *key, ikey, pkey;
	WT_SESSION_IMPL *session;

	cextract = (WT_CURSOR_EXTRACTOR *)cursor;
	session = (WT_SESSION_IMPL *)cursor->session;

	WT_ITEM_SET(ikey, cursor->key);
	/*
	 * We appended a padding byte to the key to avoid rewriting the last
	 * column.  Strip that away here.
	 */
	WT_ASSERT(session, ikey.size > 0);
	--ikey.size;
	WT_RET(__wt_cursor_get_raw_key(cextract->ctable->cg_cursors[0], &pkey));

	/*
	 * We have the index key in the format we need, and all of the primary
	 * key columns are required: just append them.
	 */
	key = &cextract->idxc->key;
	WT_RET(__wt_buf_grow(session, key, ikey.size + pkey.size));
	memcpy((uint8_t *)key->mem, ikey.data, ikey.size);
	memcpy((uint8_t *)key->mem + ikey.size, pkey.data, pkey.size);
	key->size = ikey.size + pkey.size;

	/*
	 * The index key is now set and the value is empty (it starts clear and
	 * is never set).
	 */
	F_SET(cextract->idxc, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);

	/* Call the underlying cursor function to update the index. */
	return (cextract->f(cextract->idxc));
}
Пример #4
0
/*
 * __wt_las_sweep --
 *	Sweep the lookaside table.
 */
int
__wt_las_sweep(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *cursor;
	WT_DECL_ITEM(las_addr);
	WT_DECL_ITEM(las_key);
	WT_DECL_RET;
	WT_ITEM *key;
	uint64_t cnt, las_counter, las_txnid;
	int64_t remove_cnt;
	uint32_t las_id, session_flags;
	int notused;

	conn = S2C(session);
	cursor = NULL;
	key = &conn->las_sweep_key;
	remove_cnt = 0;
	session_flags = 0;		/* [-Werror=maybe-uninitialized] */

	WT_ERR(__wt_scr_alloc(session, 0, &las_addr));
	WT_ERR(__wt_scr_alloc(session, 0, &las_key));

	WT_ERR(__wt_las_cursor(session, &cursor, &session_flags));

	/*
	 * If we're not starting a new sweep, position the cursor using the key
	 * from the last call (we don't care if we're before or after the key,
	 * just roughly in the same spot is fine).
	 */
	if (key->size != 0) {
		__wt_cursor_set_raw_key(cursor, key);
		ret = cursor->search_near(cursor, &notused);

		/*
		 * Don't search for the same key twice; if we don't set a new
		 * key below, it's because we've reached the end of the table
		 * and we want the next pass to start at the beginning of the
		 * table. Searching for the same key could leave us stuck at
		 * the end of the table, repeatedly checking the same rows.
		 */
		key->size = 0;
		if (ret != 0)
			goto srch_notfound;
	}

	/*
	 * The sweep server wakes up every 10 seconds (by default), it's a slow
	 * moving thread. Try to review the entire lookaside table once every 5
	 * minutes, or every 30 calls.
	 *
	 * The reason is because the lookaside table exists because we're seeing
	 * cache/eviction pressure (it allows us to trade performance and disk
	 * space for cache space), and it's likely lookaside blocks are being
	 * evicted, and reading them back in doesn't help things. A trickier,
	 * but possibly better, alternative might be to review all lookaside
	 * blocks in the cache in order to get rid of them, and slowly review
	 * lookaside blocks that have already been evicted.
	 */
	cnt = (uint64_t)WT_MAX(100, conn->las_record_cnt / 30);

	/* Discard pages we read as soon as we're done with them. */
	F_SET(session, WT_SESSION_NO_CACHE);

	/* Walk the file. */
	for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) {
		/*
		 * If the loop terminates after completing a work unit, we will
		 * continue the table sweep next time. Get a local copy of the
		 * sweep key, we're going to reset the cursor; do so before
		 * calling cursor.remove, cursor.remove can discard our hazard
		 * pointer and the page could be evicted from underneath us.
		 */
		if (cnt == 1) {
			WT_ERR(__wt_cursor_get_raw_key(cursor, key));
			if (!WT_DATA_IN_ITEM(key))
				WT_ERR(__wt_buf_set(
				    session, key, key->data, key->size));
		}

		WT_ERR(cursor->get_key(cursor,
		    &las_id, las_addr, &las_counter, &las_txnid, las_key));

		/*
		 * If the on-page record transaction ID associated with the
		 * record is globally visible, the record can be discarded.
		 *
		 * Cursor opened overwrite=true: won't return WT_NOTFOUND should
		 * another thread remove the record before we do, and the cursor
		 * remains positioned in that case.
		 */
		if (__wt_txn_visible_all(session, las_txnid)) {
			WT_ERR(cursor->remove(cursor));
			++remove_cnt;
		}
	}

srch_notfound:
	WT_ERR_NOTFOUND_OK(ret);

	if (0) {
err:		__wt_buf_free(session, key);
	}

	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));

	/*
	 * If there were races to remove records, we can over-count.  All
	 * arithmetic is signed, so underflow isn't fatal, but check anyway so
	 * we don't skew low over time.
	 */
	if (remove_cnt > S2C(session)->las_record_cnt)
		S2C(session)->las_record_cnt = 0;
	else if (remove_cnt > 0)
		(void)__wt_atomic_subi64(&conn->las_record_cnt, remove_cnt);

	F_CLR(session, WT_SESSION_NO_CACHE);

	__wt_scr_free(session, &las_addr);
	__wt_scr_free(session, &las_key);

	return (ret);
}
Пример #5
0
/*
 * __apply_idx --
 *	Apply an operation to all indices of a table.
 */
static int
__apply_idx(WT_CURSOR_TABLE *ctable, size_t func_off, int skip_immutable) {
	WT_CURSOR_STATIC_INIT(iface,
	    __wt_cursor_get_key,		/* get-key */
	    __wt_cursor_get_value,		/* get-value */
	    __wt_cursor_set_key,		/* set-key */
	    __wt_cursor_set_value,		/* set-value */
	    __wt_cursor_notsup,			/* compare */
	    __wt_cursor_notsup,			/* next */
	    __wt_cursor_notsup,			/* prev */
	    __wt_cursor_notsup,			/* reset */
	    __wt_cursor_notsup,			/* search */
	    __wt_cursor_notsup,			/* search-near */
	    __curextract_insert,		/* insert */
	    __wt_cursor_notsup,			/* update */
	    __wt_cursor_notsup,			/* remove */
	    __wt_cursor_notsup);		/* close */
	WT_CURSOR **cp;
	WT_CURSOR_EXTRACTOR extract_cursor;
	WT_DECL_RET;
	WT_INDEX *idx;
	WT_ITEM key, value;
	WT_SESSION_IMPL *session;
	int (*f)(WT_CURSOR *);
	u_int i;

	cp = ctable->idx_cursors;
	session = (WT_SESSION_IMPL *)ctable->iface.session;

	for (i = 0; i < ctable->table->nindices; i++, cp++) {
		idx = ctable->table->indices[i];
		if (skip_immutable && F_ISSET(idx, WT_INDEX_IMMUTABLE))
			continue;

		f = *(int (**)(WT_CURSOR *))((uint8_t *)*cp + func_off);
		if (idx->extractor) {
			extract_cursor.iface = iface;
			extract_cursor.iface.session = &session->iface;
			extract_cursor.iface.key_format = idx->exkey_format;
			extract_cursor.ctable = ctable;
			extract_cursor.idxc = *cp;
			extract_cursor.f = f;

			WT_RET(__wt_cursor_get_raw_key(&ctable->iface, &key));
			WT_RET(
			    __wt_cursor_get_raw_value(&ctable->iface, &value));
			ret = idx->extractor->extract(idx->extractor,
			    &session->iface, &key, &value,
			    &extract_cursor.iface);

			__wt_buf_free(session, &extract_cursor.iface.key);
			WT_RET(ret);
		} else {
			WT_RET(__wt_schema_project_merge(session,
			    ctable->cg_cursors,
			    idx->key_plan, idx->key_format, &(*cp)->key));
			/*
			 * The index key is now set and the value is empty
			 * (it starts clear and is never set).
			 */
			F_SET(*cp, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
			WT_RET(f(*cp));
		}
		WT_RET((*cp)->reset(*cp));
	}

	return (0);
}
Пример #6
0
/*
 * __wt_las_sweep --
 *	Sweep the lookaside table.
 */
int
__wt_las_sweep(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *cursor;
	WT_DECL_ITEM(las_addr);
	WT_DECL_ITEM(las_key);
	WT_DECL_RET;
	WT_ITEM *key;
	uint64_t cnt, las_counter, las_txnid;
	uint32_t las_id, session_flags;
	int notused;

	conn = S2C(session);
	cursor = NULL;
	key = &conn->las_sweep_key;
	session_flags = 0;		/* [-Werror=maybe-uninitialized] */

	WT_ERR(__wt_scr_alloc(session, 0, &las_addr));
	WT_ERR(__wt_scr_alloc(session, 0, &las_key));

	WT_ERR(__wt_las_cursor(session, &cursor, &session_flags));

	/*
	 * If we're not starting a new sweep, position the cursor using the key
	 * from the last call (we don't care if we're before or after the key,
	 * just roughly in the same spot is fine).
	 */
	if (conn->las_sweep_call != 0 && key->data != NULL) {
		__wt_cursor_set_raw_key(cursor, key);
		if ((ret = cursor->search_near(cursor, &notused)) != 0)
			goto srch_notfound;
	}

	/*
	 * The sweep server wakes up every 10 seconds (by default), it's a slow
	 * moving thread. Try to review the entire lookaside table once every 5
	 * minutes, or every 30 calls.
	 *
	 * The reason is because the lookaside table exists because we're seeing
	 * cache/eviction pressure (it allows us to trade performance and disk
	 * space for cache space), and it's likely lookaside blocks are being
	 * evicted, and reading them back in doesn't help things. A trickier,
	 * but possibly better, alternative might be to review all lookaside
	 * blocks in the cache in order to get rid of them, and slowly review
	 * lookaside blocks that have already been evicted.
	 *
	 * We can't know for sure how many records are in the lookaside table,
	 * the cursor insert and remove statistics aren't updated atomically.
	 * Start with reviewing 100 rows, and if it takes more than the target
	 * number of calls to finish, increase the number of rows checked on
	 * each call; if it takes less than the target calls to finish, then
	 * decrease the number of rows reviewed on each call (but never less
	 * than 100).
	 */
#define	WT_SWEEP_LOOKASIDE_MIN_CNT	100
#define	WT_SWEEP_LOOKASIDE_PASS_TARGET	 30
	++conn->las_sweep_call;
	if ((cnt = conn->las_sweep_cnt) < WT_SWEEP_LOOKASIDE_MIN_CNT)
		cnt = conn->las_sweep_cnt = WT_SWEEP_LOOKASIDE_MIN_CNT;

	/* Walk the file. */
	for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) {
		/*
		 * If the loop terminates after completing a work unit, we will
		 * continue the table sweep next time. Get a local copy of the
		 * sweep key, we're going to reset the cursor; do so before
		 * calling cursor.remove, cursor.remove can discard our hazard
		 * pointer and the page could be evicted from underneath us.
		 */
		if (cnt == 1) {
			WT_ERR(__wt_cursor_get_raw_key(cursor, key));
			if (!WT_DATA_IN_ITEM(key))
				WT_ERR(__wt_buf_set(
				    session, key, key->data, key->size));
		}

		WT_ERR(cursor->get_key(cursor,
		    &las_id, las_addr, &las_counter, &las_txnid, las_key));

		/*
		 * If the on-page record transaction ID associated with the
		 * record is globally visible, the record can be discarded.
		 *
		 * Cursor opened overwrite=true: won't return WT_NOTFOUND should
		 * another thread remove the record before we do, and the cursor
		 * remains positioned in that case.
		 */
		if (__wt_txn_visible_all(session, las_txnid))
			WT_ERR(cursor->remove(cursor));
	}

	/*
	 * When reaching the lookaside table end or the target number of calls,
	 * adjust the row count. Decrease/increase the row count depending on
	 * if the number of calls is less/more than the target.
	 */
	if (ret == WT_NOTFOUND ||
	    conn->las_sweep_call > WT_SWEEP_LOOKASIDE_PASS_TARGET) {
		if (conn->las_sweep_call < WT_SWEEP_LOOKASIDE_PASS_TARGET &&
		    conn->las_sweep_cnt > WT_SWEEP_LOOKASIDE_MIN_CNT)
			conn->las_sweep_cnt -= WT_SWEEP_LOOKASIDE_MIN_CNT;
		if (conn->las_sweep_call > WT_SWEEP_LOOKASIDE_PASS_TARGET)
			conn->las_sweep_cnt += WT_SWEEP_LOOKASIDE_MIN_CNT;
	}

srch_notfound:
	if (ret == WT_NOTFOUND)
		conn->las_sweep_call = 0;

	WT_ERR_NOTFOUND_OK(ret);

	if (0) {
err:		__wt_buf_free(session, key);
	}

	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));

	__wt_scr_free(session, &las_addr);
	__wt_scr_free(session, &las_key);

	return (ret);
}
Пример #7
0
/*
 * __curdump_get_key --
 *	WT_CURSOR->get_key for dump cursors.
 */
static int
__curdump_get_key(WT_CURSOR *cursor, ...)
{
	WT_CURSOR *child;
	WT_CURSOR_DUMP *cdump;
	WT_CURSOR_JSON *json;
	WT_DECL_RET;
	WT_ITEM item, *itemp;
	WT_SESSION_IMPL *session;
	size_t size;
	uint64_t recno;
	const char *fmt;
	const void *buffer;
	va_list ap;

	cdump = (WT_CURSOR_DUMP *)cursor;
	child = cdump->child;

	va_start(ap, cursor);
	CURSOR_API_CALL(cursor, session, get_key, NULL);

	if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
		json = (WT_CURSOR_JSON *)cursor->json_private;
		WT_ASSERT(session, json != NULL);
		if (WT_CURSOR_RECNO(cursor)) {
			WT_ERR(child->get_key(child, &recno));
			buffer = &recno;
			size = sizeof(recno);
			fmt = "R";
		} else {
			WT_ERR(__wt_cursor_get_raw_key(child, &item));
			buffer = item.data;
			size = item.size;
			if (F_ISSET(cursor, WT_CURSTD_RAW))
				fmt = "u";
			else
				fmt = cursor->key_format;
		}
		ret = __wt_json_alloc_unpack(
		    session, buffer, size, fmt, json, true, ap);
	} else {
		if (WT_CURSOR_RECNO(cursor) &&
		    !F_ISSET(cursor, WT_CURSTD_RAW)) {
			WT_ERR(child->get_key(child, &recno));

			WT_ERR(__wt_buf_fmt(session, &cursor->key, "%"
			    PRIu64, recno));
		} else {
			WT_ERR(child->get_key(child, &item));

			WT_ERR(__raw_to_dump(session, &item, &cursor->key,
			    F_ISSET(cursor, WT_CURSTD_DUMP_HEX)));
		}

		if (F_ISSET(cursor, WT_CURSTD_RAW)) {
			itemp = va_arg(ap, WT_ITEM *);
			itemp->data = cursor->key.data;
			itemp->size = cursor->key.size;
		} else
			*va_arg(ap, const char **) = cursor->key.data;
	}