Beispiel #1
0
/* {{{ rberkeley_db_compact */
SEXP rberkeley_db_compact (SEXP _dbp, SEXP _txnid, SEXP _start,
                           SEXP _stop, SEXP _c_data, SEXP _flags)
{
  DB *dbp;
  DB_TXN *txnid;
  DBT start, stop, end;
  /*DB_COMPACT c_data;*/
  u_int32_t flags;
  int ret;

  if(isNull(_txnid)) {
    txnid = R_ExternalPtrAddr(_txnid);
  } else {
    txnid = NULL;
  }
  if(!isNull(_start)) {
    memset(&start, 0, sizeof(DBT));
    start.data = (unsigned char *)RAW(_start);
    start.size = length(_start);
  }
  if(!isNull(_stop)) {
    memset(&stop, 0, sizeof(DBT));
    stop.data = (unsigned char *)RAW(_stop);
    stop.size = length(_stop);
  }
  flags = (u_int32_t)INTEGER(_flags)[0];
  /*memset(&end, 0, sizeof(end));*/

  dbp = R_ExternalPtrAddr(_dbp);
  if(R_ExternalPtrTag(_dbp) != RBerkeley_DB || dbp == NULL)
    error("invalid 'db' handle");

  ret = dbp->compact(dbp, txnid, &start, &stop, NULL, flags, &end); 

  return ScalarInteger(ret);
}
Beispiel #2
0
MojErr MojDbBerkeleyEngine::compact()
{
	const char * DatabaseRoot = "/var/db"; // FIXME: Should not be hard-coded, but so is the disk space monitor!

	struct statvfs statAtBeginning, statAfterCompact, statAtEnd;

	MojLogTrace(MojDbBerkeleyEngine::s_log);

	struct timeval totalStartTime = {0,0}, totalStopTime = {0,0};

	gettimeofday(&totalStartTime, NULL);

	memset(&statAtBeginning, '\0', sizeof(statAtBeginning));
	::statvfs(DatabaseRoot, &statAtBeginning);

	const int blockSize = (int)statAtBeginning.f_bsize;

	// checkpoint before compact
	MojErr err = m_env->checkpoint(0);
	MojErrCheck(err);

	memset(&statAfterCompact, '\0', sizeof(statAfterCompact));
	::statvfs(DatabaseRoot, &statAfterCompact);

	int pre_compact_reclaimed_blocks = (int)(statAfterCompact.f_bfree - statAtBeginning.f_bfree);

    MojLogDebug(s_log, _T("Starting compact: Checkpoint freed %d bytes. Volume %s has %lu bytes free out of %lu bytes (%.1f full)\n"),
		pre_compact_reclaimed_blocks * blockSize,
		DatabaseRoot, statAfterCompact.f_bfree * blockSize,
		 statAfterCompact.f_blocks * blockSize,
		 (float)(statAfterCompact.f_blocks - statAfterCompact.f_bfree) * 100.0 / (float)statAfterCompact.f_blocks);


	// Retrieve setting for record count used to break up compact operations
	const int stepSize = m_env->compactStepSize();

	memset(&statAtBeginning, '\0', sizeof(statAtBeginning));
	::statvfs(DatabaseRoot, &statAtBeginning);

	int total_pages_examined = 0, total_pages_freed = 0, total_pages_truncated = 0;
	int max_pages_examined = 0, max_pages_freed = 0, max_pages_truncated = 0;
	int total_log_generation_blocks = 0, total_reclaimed_blocks = 0;
	int max_log_generation_blocks = 0, max_reclaimed_blocks = 0;

	int total_compact_time = 0, total_step_time = 0;
	int max_compact_time = 0, max_step_time = 0;

	int total_key_total = 0, total_value_total = 0;
	int max_key_total = 0, max_value_total = 0;

	MojThreadGuard guard(m_dbMutex);
	// call compact on each database
	for (DatabaseVec::ConstIterator i = m_dbs.begin(); i != m_dbs.end(); ++i) {
		DB* db = (*i)->impl();
		DB_COMPACT c_data;
		MojZero(&c_data, sizeof(c_data));

		DBC * dbc = NULL;
		int dbErr;
		DBT key1, key2;
		DBT value;

		memset(&key1, '\0', sizeof(key1));
		memset(&key2, '\0', sizeof(key2));
		memset(&value, '\0', sizeof(value));
		key1.flags = DB_DBT_REALLOC;
		key2.flags = DB_DBT_REALLOC;
		value.flags = DB_DBT_REALLOC;

		int key1_count = 0, key2_count = 0;

		dbErr = 0;

		// Continue compacting the database by chunks until we run into an error. If a stepSize
		// isn't configured, don't chunk it at all.
		while ((stepSize >= 1) && (dbErr == 0)) {

			// Construct key to step forward by a set number of records, to select the compact window.
			// We close the cursor after we've found the next key, so it won't keep a lock open that
			// could disrupt the compaction. Without locking, we might miss an insertion or deletion
			// happening between compactions, but that

			int key_total = 0, value_total = 0; // Tracked only for debugging purposes.

			dbErr = db->cursor(db, NULL, &dbc, 0);

			if (dbErr == 0) {

				if (key1.data == NULL) {
					// Move the cursor to the beginning of the database
					dbErr = dbc->get(dbc, &key1, &value, DB_FIRST);

					key_total += key1.size;
					value_total += value.size;

					// discard key1, we don't want the key for the beginning
					if (key1.data)
						free(key1.data);

					key1.data = NULL;
					key1.size = 0;

				} else {
					// move the cursor to the location of the prior key.
					// If that exact key is missing, this should choose the
					// next one.
					dbErr = dbc->get(dbc, &key1, &value, DB_SET_RANGE);
				}

				int elapsedStepTimeMS = 0;

				if (dbErr == DB_NOTFOUND) {
					// If we didn't find a first key, the DB is presumably empty,
					// and we shouldn't search for the end key.

					dbErr = 0;

					if (key1.data)
						free(key1.data);
					key1.data = NULL;
					key1.size = 0;

					if (key2.data)
						free(key2.data);
					key2.data = NULL;
					key2.size = 0;

				} else if (dbErr == 0) {

					int count;
					// Move the cursor forward by the chosen stepSize.
					// May exit early with error DB_NOTFOUND, indicating end of database.

					struct timeval startTime = {0,0}, stopTime = {0,0};

					gettimeofday(&startTime, NULL);

					for (count = 0; (dbErr == 0) && (count < stepSize); count++) {
						dbErr = dbc->get(dbc, &key2, &value, DB_NEXT);

						key_total += key2.size;
						value_total += value.size;
					}

					key2_count = key1_count + count;

					if (dbErr == DB_NOTFOUND) {
						dbErr = 0;

						if (key2.data)
							free(key2.data);
						key2.data = NULL;
						key2.size = 0;
					}

					gettimeofday(&stopTime, NULL);

					elapsedStepTimeMS = (int)(stopTime.tv_sec - startTime.tv_sec) * 1000 +
							  (int)(stopTime.tv_usec - startTime.tv_usec) / 1000;
				}

				dbc->close(dbc);

				if (dbErr != 0)
					break;

				// Compact from key1 to key2. (The documentation says it starts at 'the
				// smallest key greater than or equal to the specified key', and ends at
				// 'the page with the smallest key greater than the specified key'. I don't
				// know exactly what that means regarding inclusivity, so this procedure may
				// not be fully compacting the pages which contain the keys.)


				MojLogDebug(s_log, _T("Compacting %s (partial from ~record %d to %d). Stepped over %d/%d bytes of keys/values in %dms.\n"), (*i)->m_name.data(),
					key1_count, key2_count,
					key_total, value_total,
					elapsedStepTimeMS);

			        struct statvfs statBeforeCompact, statAfterCompact, statAfterCheckpoint;

				memset(&statBeforeCompact, '\0', sizeof(statBeforeCompact));
				::statvfs(DatabaseRoot, &statBeforeCompact);

				struct timeval startTime = {0,0}, stopTime = {0,0};

				gettimeofday(&startTime, NULL);

				MojZero(&c_data, sizeof(c_data));
				dbErr = db->compact(db, NULL, key1.data ? &key1 : NULL, key2.data ? &key2 : NULL, &c_data, DB_FREE_SPACE, NULL);

				gettimeofday(&stopTime, NULL);

				int elapsedCompactTimeMS = (int)(stopTime.tv_sec - startTime.tv_sec) * 1000 +
						           (int)(stopTime.tv_usec - startTime.tv_usec) / 1000;

		                MojLogDebug(s_log, _T("Compact stats of %s (partial from ~record %d to %d): time %dms, compact_deadlock=%d, compact_pages_examine=%d, compact_pages_free=%d, compact_levels=%d, compact_pages_truncated=%d\n"),
        		        	(*i)->m_name.data(),
        		        	key1_count, key2_count,
        		        	elapsedCompactTimeMS,
                			c_data.compact_deadlock, c_data.compact_pages_examine,
               			 	c_data.compact_pages_free, c_data.compact_levels, c_data.compact_pages_truncated);

				total_compact_time += elapsedCompactTimeMS;
				if (elapsedCompactTimeMS > max_compact_time)
					max_compact_time = elapsedCompactTimeMS;
				total_step_time += elapsedStepTimeMS;
				if (elapsedStepTimeMS > max_step_time)
					max_step_time = elapsedStepTimeMS;

				total_key_total += key_total;
				if (key_total > max_key_total)
					max_key_total = key_total;
				total_value_total += value_total;
				if (value_total > max_value_total)
					max_value_total = value_total;

				total_pages_examined += c_data.compact_pages_examine;
				if ((int)c_data.compact_pages_examine > max_pages_examined)
					max_pages_examined = c_data.compact_pages_examine;
				total_pages_freed += c_data.compact_pages_free;
				if ((int)c_data.compact_pages_free > max_pages_freed)
					max_pages_freed = c_data.compact_pages_free;
				total_pages_truncated += c_data.compact_pages_truncated;
				if ((int)c_data.compact_pages_truncated > max_pages_truncated)
					max_pages_truncated = c_data.compact_pages_truncated;

				memset(&statAfterCompact, '\0', sizeof(statAfterCompact));
				::statvfs(DatabaseRoot, &statAfterCompact);

				int log_generation_blocks = (int)(statBeforeCompact.f_bfree - statAfterCompact.f_bfree);

				total_log_generation_blocks += log_generation_blocks;
				if (log_generation_blocks > max_log_generation_blocks)
					max_log_generation_blocks = log_generation_blocks;

				err = m_env->checkpoint(0);
				MojErrCheck(err);

				memset(&statAfterCompact, '\0', sizeof(statAfterCheckpoint));
				::statvfs(DatabaseRoot, &statAfterCheckpoint);

                int reclaimed_blocks = (int)(statAfterCheckpoint.f_bfree - statBeforeCompact.f_bfree);

				total_reclaimed_blocks += reclaimed_blocks;
				if (reclaimed_blocks > max_reclaimed_blocks)
					max_reclaimed_blocks = reclaimed_blocks;

				MojLogDebug(s_log, _T("Compact of %s (partial from ~record %d to %d) generated %d bytes of log data, ultimately reclaiming %d bytes after checkpoint.\n"),
					(*i)->m_name.data(),
					key1_count, key2_count,
					log_generation_blocks * blockSize,
					reclaimed_blocks * blockSize);

				// copy key2 over key1
				if (key1.data)
					free(key1.data);
				key1.data = key2.data;
				key1.size = key2.size;
				key2.data = NULL;
				key2.size = 0;
				key1_count = key2_count;

				// if key2 was empty, then we are done.
				if (key1.data == NULL)
					break;

			}


		}

		if (key1.data)
			free(key1.data);
		if (key2.data)
			free(key2.data);
		if (value.data)
			free(value.data);


		// If no step size was configured, fall back and do a complete compact. Do the same
		// if there was an error performing the chunked compaction. The complete compact risks
		// running out of disk space, but that's preferable to not compacting at all, which will
		// also likely eventually lead to running out of space.

		if (dbErr == DB_LOCK_DEADLOCK) {
			// But for deadlock, we should just give up, as this might
			// happen in normal use.
			MojBdbErrCheck(dbErr, _T("cursor and compact deadlocked"));
		}

		if ((stepSize <= 1) || (dbErr != 0)) {
            MojLogDebug(s_log, "Compacting %s\n", (*i)->m_name.data());

		        struct statvfs statBeforeCompact, statAfterCompact, statAfterCheckpoint;

			memset(&statBeforeCompact, '\0', sizeof(statBeforeCompact));
			::statvfs(DatabaseRoot, &statBeforeCompact);

			struct timeval startTime = {0,0}, stopTime = {0,0};

			gettimeofday(&startTime, NULL);

			MojZero(&c_data, sizeof(c_data));
		        dbErr = db->compact(db, NULL, NULL, NULL, &c_data, DB_FREE_SPACE, NULL);

			gettimeofday(&stopTime, NULL);

			int elapsedCompactTimeMS = (int)(stopTime.tv_sec - startTime.tv_sec) * 1000 +
					           (int)(stopTime.tv_usec - startTime.tv_usec) / 1000;

			total_compact_time += elapsedCompactTimeMS;
			if (elapsedCompactTimeMS > max_compact_time)
				max_compact_time = elapsedCompactTimeMS;

       	        	MojLogDebug(s_log, "Compact stats of %s: time %dms, compact_deadlock=%d, compact_pages_examine=%d, compact_pages_free=%d, compact_levels=%d, compact_pages_truncated=%d\n",
                		(*i)->m_name.data(),
                		elapsedCompactTimeMS,
                		c_data.compact_deadlock, c_data.compact_pages_examine,
                		c_data.compact_pages_free, c_data.compact_levels, c_data.compact_pages_truncated);

			total_pages_examined += c_data.compact_pages_examine;
			if ((int)c_data.compact_pages_examine > max_pages_examined)
				max_pages_examined = c_data.compact_pages_examine;
			total_pages_freed += c_data.compact_pages_free;
			if ((int)c_data.compact_pages_free > max_pages_freed)
				max_pages_freed = c_data.compact_pages_free;
			total_pages_truncated += c_data.compact_pages_truncated;
			if ((int)c_data.compact_pages_truncated > max_pages_truncated)
				max_pages_truncated = c_data.compact_pages_truncated;

			memset(&statAfterCompact, '\0', sizeof(statAfterCompact));
			::statvfs(DatabaseRoot, &statAfterCompact);

			int log_generation_blocks = (int)(statBeforeCompact.f_bfree - statAfterCompact.f_bfree);

			total_log_generation_blocks += log_generation_blocks;
			if (log_generation_blocks > max_log_generation_blocks)
				max_log_generation_blocks = log_generation_blocks;

			err = m_env->checkpoint(0);
			MojErrCheck(err);

			memset(&statAfterCompact, '\0', sizeof(statAfterCheckpoint));
			::statvfs(DatabaseRoot, &statAfterCheckpoint);

			int reclaimed_blocks = (int)(statAfterCheckpoint.f_bfree - statBeforeCompact.f_bfree);

			total_reclaimed_blocks += reclaimed_blocks;
			if (reclaimed_blocks > max_reclaimed_blocks)
				max_reclaimed_blocks = reclaimed_blocks;

            MojLogDebug(s_log, "Compact of %s generated %d bytes of log data, ultimately reclaiming %d bytes after checkpoint.\n",
				(*i)->m_name.data(),
				log_generation_blocks * blockSize,
				reclaimed_blocks * blockSize);
		}
		MojBdbErrCheck(dbErr, _T("db->compact"));

	}
	guard.unlock();


	gettimeofday(&totalStopTime, NULL);

	int elapsedTotalMS = (int)(totalStopTime.tv_sec - totalStartTime.tv_sec) * 1000 +
		             (int)(totalStopTime.tv_usec - totalStartTime.tv_usec) / 1000;

	memset(&statAtEnd, '\0', sizeof(statAtEnd));
	::statvfs(DatabaseRoot, &statAtEnd);

	int compact_freed_blocks = (int)(statAtEnd.f_bfree - statAtBeginning.f_bfree);

    MojLogDebug(s_log, _T("During compact: %d db pages examined (max burst %d), %d db pages freed (max burst %d), "
			     "%d db pages truncated (max burst %d), "
	                     "%d log bytes created by compacts (max burst %d), "
	                     "%d bytes reclaimed by checkpoints (max burst %d), "
	                     "%d bytes of keys stepped over (max burst %d), "
	                     "%d bytes of values stepped over (max burst %d), "
	                     "%dms spent in stepping (max burst %dms), "
	                     "%dms spent in compact (max burst %dms)\n"),
	                     total_pages_examined, max_pages_examined, total_pages_freed, max_pages_freed,
	                     total_pages_truncated, max_pages_truncated,
	                     total_log_generation_blocks * blockSize, max_log_generation_blocks * blockSize,
	                     total_reclaimed_blocks * blockSize, max_reclaimed_blocks * blockSize,
	                     total_key_total, max_key_total,
	                     total_value_total, max_value_total,
	                     total_step_time, max_step_time,
	                     total_compact_time, max_step_time
	                     );

    MojLogDebug(s_log, _T("Compact complete: took %dms, freed %d bytes (including pre-checkpoint of %d bytes). Volume %s has %lu bytes free out of %lu bytes (%.1f full)\n"),
		elapsedTotalMS,
		compact_freed_blocks * blockSize,
		pre_compact_reclaimed_blocks * blockSize,
		DatabaseRoot,
		statAfterCompact.f_bfree * blockSize,
		 statAfterCompact.f_blocks * blockSize,
		 (float)(statAfterCompact.f_blocks - statAfterCompact.f_bfree) * 100.0 / (float)statAfterCompact.f_blocks);

	return MojErrNone;
}
Beispiel #3
0
/*
** A write transaction must be opened before calling this function.
** It performs a single unit of work towards an incremental vacuum.
** Specifically, in the Berkeley DB storage manager, it attempts to compact
** one table.
**
** If the incremental vacuum is finished after this function has run,
** SQLITE_DONE is returned. If it is not finished, but no error occurred,
** SQLITE_OK is returned. Otherwise an SQLite error code.
**
** The caller can get and accumulate the number of truncated pages truncated
** with input parameter truncatedPages. Also, btreeIncrVacuum would skip
** the vacuum if enough pages has been truncated for optimization.
*/
int btreeIncrVacuum(Btree *p, u_int32_t *truncatedPages)
{
	BtShared *pBt;
	CACHED_DB *cached_db;
	DB *dbp;
	DBT key, data;
	char *fileName, *tableName, tableNameBuf[DBNAME_SIZE];
	void *app;
	int iTable, rc, ret, t_ret;
	u_int32_t was_create;
	DB_COMPACT compact_data;
	DBT *pStart, end;	/* start/end of db_compact() */
	struct VacuumInfo *pInfo;
	int vacuumMode;

	assert(p->pBt->dbStorage == DB_STORE_NAMED);

	if (!p->connected && (rc = btreeOpenEnvironment(p, 1)) != SQLITE_OK)
		return rc;

	pBt = p->pBt;
	rc = SQLITE_OK;
	cached_db = NULL;
	dbp = NULL;
	memset(&end, 0, sizeof(end));
#ifndef BDBSQL_OMIT_LEAKCHECK
	/* Let BDB use the user-specified malloc function (btreeMalloc) */
	end.flags |= DB_DBT_MALLOC;
#endif

	/*
	 * Turn off DB_CREATE: we don't want to create any tables that don't
	 * already exist.
	 */
	was_create = (pBt->db_oflags & DB_CREATE);
	pBt->db_oflags &= ~DB_CREATE;

	memset(&key, 0, sizeof(key));
	key.data = tableNameBuf;
	key.ulen = sizeof(tableNameBuf);
	key.flags = DB_DBT_USERMEM;
	memset(&data, 0, sizeof(data));
	data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;

	UPDATE_DURING_BACKUP(p);

	if (p->compact_cursor == NULL) {
		if ((ret = pTablesDb->cursor(pTablesDb, pReadTxn,
		    &p->compact_cursor, 0)) != 0)
			goto err;
	}
	if ((ret = p->compact_cursor->get(p->compact_cursor,
	    &key, &data, DB_NEXT)) == DB_NOTFOUND) {
		(void)p->compact_cursor->close(p->compact_cursor);
		p->compact_cursor = NULL;
		pBt->db_oflags |= was_create;
		return SQLITE_DONE;
	} else if (ret != 0)
		goto err;

	tableNameBuf[key.size] = '\0';
	if (strncmp(tableNameBuf, "table", 5) != 0) {
		iTable = 0;
#ifdef BDBSQL_FILE_PER_TABLE
		/* Cannot compact the metadata file */
		goto err;
#endif

		/* Open a DB handle on that table. */
		if ((ret = db_create(&dbp, pDbEnv, 0)) != 0)
			goto err;
		if (pBt->encrypted &&
		    (ret = dbp->set_flags(dbp, DB_ENCRYPT)) != 0)
			goto err;

		tableName = tableNameBuf;
		FIX_TABLENAME(pBt, fileName, tableName);

		/*
		 * We know we're not creating this table, open it using the
		 * family transaction because that keeps the dbreg records out
		 * of the vacuum transaction, reducing pressure on the log
		 * region (since we copy the filename of every open DB handle
		 * into the log region).
		 */
		if ((ret = dbp->open(dbp, pFamilyTxn, fileName, tableName,
		    DB_BTREE, GET_AUTO_COMMIT(pBt, pFamilyTxn), 0)) != 0)
			goto err;
	} else {
		if ((ret = btreeTableNameToId(tableNameBuf,
		    key.size, &iTable)) != 0)
			goto err;

		/* Try to retrieve the matching handle from the cache. */
		rc = btreeFindOrCreateDataTable(p, &iTable, &cached_db, 0);
		if (rc != SQLITE_OK)
			goto err;
		assert(cached_db != NULL && cached_db->dbp != NULL);

		dbp = cached_db->dbp;
		if ((iTable & 1) == 0) {
			/*
			 * Attach the DB handle to a SQLite index, required for
			 * the key comparator to work correctly.  If we can't
			 * find an Index struct, just skip this database.  It
			 * may not be open yet (c.f. whereA-1.7).
			 */
#ifdef BDBSQL_SINGLE_THREAD
			rc = btreeGetKeyInfo(p, iTable,
			    (KeyInfo **)&(dbp->app_private));
#else
			rc = btreeGetKeyInfo(p, iTable,
			    &((TableInfo *)dbp->app_private)->pKeyInfo);
#endif
			if (rc != SQLITE_OK)
				goto err;
		}
	}

	/*
	 * In following db_compact, we use the family transaction because
	 * DB->compact will then auto-commit, and it has built-in smarts
	 * about retrying on deadlock.
	 */
	/* Setup compact_data as configured */
	memset(&compact_data, 0, sizeof(compact_data));
	compact_data.compact_fillpercent = p->fillPercent;

	vacuumMode = sqlite3BtreeGetAutoVacuum(p);
	if (vacuumMode == BTREE_AUTOVACUUM_NONE) {
		ret = dbp->compact(dbp, pFamilyTxn,
		    NULL, NULL, &compact_data, DB_FREE_SPACE, NULL);
	/* Skip current table if we have truncated enough pages */
	} else if (truncatedPages == NULL ||
	    (truncatedPages != NULL && *truncatedPages < p->vacuumPages)) {
		/* Find DBT for db_compact start */
		for (pInfo = p->vacuumInfo, pStart = NULL;
		     pInfo != NULL; pInfo = pInfo->next) {
			if (pInfo->iTable == iTable)
				break;
		}

		/* Create new VacuumInfo for current iTable as needed */
		if (pInfo == NULL) {
			/* Create info for current iTable */
			if ((pInfo = (struct VacuumInfo *)sqlite3_malloc(
			    sizeof(struct VacuumInfo))) == NULL) {
				rc = SQLITE_NOMEM;
				goto err;
			}
			memset(pInfo, 0, sizeof(struct VacuumInfo));
			pInfo->iTable = iTable;
			pInfo->next = p->vacuumInfo;
			p->vacuumInfo = pInfo;
		}
		pStart = &(pInfo->start);

		/* Do page compact for IncrVacuum */
		if (vacuumMode == BTREE_AUTOVACUUM_INCR) {
			/* Do compact with given arguments */
			compact_data.compact_pages = p->vacuumPages;
			if ((ret = dbp->compact(dbp, pFamilyTxn,
				(pStart->data == NULL) ? NULL : pStart,
				NULL, &compact_data, 0, &end)) != 0)
				goto err;

			/* Save current vacuum position */
			if (pStart->data != NULL)
				sqlite3_free(pStart->data);
			memcpy(pStart, &end, sizeof(DBT));
			memset(&end, 0, sizeof(end));

			/* Rewind to start if we reach the end of subdb */
			if (compact_data.compact_pages_free < p->vacuumPages ||
			    p->vacuumPages == 0) {
				if (pStart->data != NULL)
					sqlite3_free(pStart->data);
				memset(pStart, 0, sizeof(DBT));
			}
		}
		/* Because of the one-pass nature of the compaction algorithm,
		 * any unemptied page near the end of the file inhibits
		 * returning pages to the file system.
		 * A repeated call to the DB->compact() method with a low
		 * compact_fillpercent may be used to return pages in this case.
		 */
		memset(&compact_data, 0, sizeof(compact_data));
		compact_data.compact_fillpercent = 1;
		if ((ret = dbp->compact(dbp, pFamilyTxn, NULL, NULL,
			    &compact_data, DB_FREE_SPACE, NULL)) != 0)
			goto err;
		if (truncatedPages != NULL && *truncatedPages > 0)
			*truncatedPages += compact_data.compact_pages_truncated;
	}

err:	/* Free cursor and DBT if run into error */
	if (ret != 0) {
		if (p->compact_cursor != NULL) {
			(void)p->compact_cursor->close(p->compact_cursor);
			p->compact_cursor = NULL;
		}
		if (end.data != NULL)
			sqlite3_free(end.data);
		btreeFreeVacuumInfo(p);
	}

	if (cached_db != NULL) {
#ifdef BDBSQL_SINGLE_THREAD
		if ((app = dbp->app_private) != NULL)
			sqlite3DbFree(p->db, app);
#else
		if (dbp->app_private != NULL &&
		    (app = ((TableInfo *)dbp->app_private)->pKeyInfo) != NULL) {
			sqlite3DbFree(p->db, app);
			((TableInfo *)dbp->app_private)->pKeyInfo = NULL;
		}
#endif
	} else if (dbp != NULL) {
		app = dbp->app_private;
		if ((t_ret = dbp->close(dbp, DB_NOSYNC)) != 0 && ret == 0)
			ret = t_ret;
		if (app != NULL)
			sqlite3DbFree(p->db, app);
	}

	pBt->db_oflags |= was_create;

	return MAP_ERR(rc, ret, p);
}