Example #1
0
/* Since we opened it, these shouldn't fail unless it's recent corruption. */
static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
{
	struct tdb_header hdr;

	if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), DOCONV()) == -1)
		return false;
	if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
		goto corrupt;

	CONVERT(hdr);
	if (hdr.version != TDB_VERSION)
		goto corrupt;

	if (hdr.rwlocks != 0)
		goto corrupt;

	if (hdr.hash_size == 0)
		goto corrupt;

	if (hdr.hash_size != tdb->header.hash_size)
		goto corrupt;

	if (hdr.recovery_start != 0 &&
	    hdr.recovery_start < TDB_DATA_START(tdb->header.hash_size))
		goto corrupt;

	*recovery = hdr.recovery_start;
	return true;

corrupt:
	tdb->ecode = TDB_ERR_CORRUPT;
	TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
	return false;
}
Example #2
0
File: mutex.c Project: hef/samba
/*
 * Get the index for a chain mutex
 */
static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
			    unsigned *idx)
{
	/*
	 * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
	 * the 4 bytes of the freelist start and the hash chain that is about
	 * to be locked. See lock_offset() where the freelist is -1 vs the
	 * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
	 * the tdb file itself as data, we need to adjust the offset here.
	 */
	const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);

	if (!tdb_have_mutexes(tdb)) {
		return false;
	}
	if (len != 1) {
		/* Possibly the allrecord lock */
		return false;
	}
	if (off < freelist_lock_ofs) {
		/* One of the special locks */
		return false;
	}
	if (tdb->hash_size == 0) {
		/* tdb not initialized yet, called from tdb_open_ex() */
		return false;
	}
	if (off >= TDB_DATA_START(tdb->hash_size)) {
		/* Single record lock from traverses */
		return false;
	}

	/*
	 * Now we know it's a freelist or hash chain lock. Those are always 4
	 * byte aligned. Paranoia check.
	 */
	if ((off % sizeof(tdb_off_t)) != 0) {
		abort();
	}

	/*
	 * Re-index the fcntl offset into an offset into the mutex array
	 */
	off -= freelist_lock_ofs; /* rebase to index 0 */
	off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */

	*idx = off;
	return true;
}
Example #3
0
/* Since we opened it, these shouldn't fail unless it's recent corruption. */
static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
{
	struct tdb_header hdr;
	uint32_t h1, h2;

	if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
		return false;
	if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
		goto corrupt;

	CONVERT(hdr);
	if (hdr.version != TDB_VERSION)
		goto corrupt;

	if (hdr.rwlocks != 0 &&
	    hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
	    hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
		goto corrupt;

	tdb_header_hash(tdb, &h1, &h2);
	if (hdr.magic1_hash && hdr.magic2_hash &&
	    (hdr.magic1_hash != h1 || hdr.magic2_hash != h2))
		goto corrupt;

	if (hdr.hash_size == 0)
		goto corrupt;

	if (hdr.hash_size != tdb->hash_size)
		goto corrupt;

	if (hdr.recovery_start != 0 &&
	    hdr.recovery_start < TDB_DATA_START(tdb->hash_size))
		goto corrupt;

	*recovery = hdr.recovery_start;
	return true;

corrupt:
	tdb->ecode = TDB_ERR_CORRUPT;
	TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
	return false;
}
Example #4
0
/* Generic record header check. */
static bool tdb_check_record(struct tdb_context *tdb,
			     tdb_off_t off,
			     const struct tdb_record *rec)
{
	tdb_off_t tailer;

	/* Check rec->next: 0 or points to record offset, aligned. */
	if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size)){
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %d too small next %d\n",
			 off, rec->next));
		goto corrupt;
	}
	if (rec->next + sizeof(*rec) < rec->next) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %d too large next %d\n",
			 off, rec->next));
		goto corrupt;
	}
	if ((rec->next % TDB_ALIGNMENT) != 0) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %d misaligned next %d\n",
			 off, rec->next));
		goto corrupt;
	}
	if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0))
		goto corrupt;

	/* Check rec_len: similar to rec->next, implies next record. */
	if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %d misaligned length %d\n",
			 off, rec->rec_len));
		goto corrupt;
	}
	/* Must fit tailer. */
	if (rec->rec_len < sizeof(tailer)) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %d too short length %d\n",
			 off, rec->rec_len));
		goto corrupt;
	}
	/* OOB allows "right at the end" access, so this works for last rec. */
	if (tdb->methods->tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
		goto corrupt;

	/* Check tailer. */
	if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
			 &tailer) == -1)
		goto corrupt;
	if (tailer != sizeof(*rec) + rec->rec_len) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %d invalid tailer\n", off));
		goto corrupt;
	}

	return true;

corrupt:
	tdb->ecode = TDB_ERR_CORRUPT;
	return false;
}
Example #5
0
_PUBLIC_ int tdb_check(struct tdb_context *tdb,
	      int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
	      void *private_data)
{
	unsigned int h;
	unsigned char **hashes;
	tdb_off_t off, recovery_start;
	struct tdb_record rec;
	bool found_recovery = false;
	tdb_len_t dead;
	bool locked;

	/* Read-only databases use no locking at all: it's best-effort.
	 * We may have a write lock already, so skip that case too. */
	if (tdb->read_only || tdb->allrecord_lock.count != 0) {
		locked = false;
	} else {
		if (tdb_lockall_read(tdb) == -1)
			return -1;
		locked = true;
	}

	/* Make sure we know true size of the underlying file. */
	tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);

	/* Header must be OK: also gets us the recovery ptr, if any. */
	if (!tdb_check_header(tdb, &recovery_start))
		goto unlock;

	/* We should have the whole header, too. */
	if (tdb->map_size < TDB_DATA_START(tdb->hash_size)) {
		tdb->ecode = TDB_ERR_CORRUPT;
		TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
		goto unlock;
	}

	/* One big malloc: pointers then bit arrays. */
	hashes = (unsigned char **)calloc(
			1, sizeof(hashes[0]) * (1+tdb->hash_size)
			+ BITMAP_BITS / CHAR_BIT * (1+tdb->hash_size));
	if (!hashes) {
		tdb->ecode = TDB_ERR_OOM;
		goto unlock;
	}

	/* Initialize pointers */
	hashes[0] = (unsigned char *)(&hashes[1+tdb->hash_size]);
	for (h = 1; h < 1+tdb->hash_size; h++)
		hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;

	/* Freelist and hash headers are all in a row: read them. */
	for (h = 0; h < 1+tdb->hash_size; h++) {
		if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
				 &off) == -1)
			goto free;
		if (off)
			record_offset(hashes[h], off);
	}

	/* For each record, read it in and check it's ok. */
	for (off = TDB_DATA_START(tdb->hash_size);
	     off < tdb->map_size;
	     off += sizeof(rec) + rec.rec_len) {
		if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
					   DOCONV()) == -1)
			goto free;
		switch (rec.magic) {
		case TDB_MAGIC:
		case TDB_DEAD_MAGIC:
			if (!tdb_check_used_record(tdb, off, &rec, hashes,
						   check, private_data))
				goto free;
			break;
		case TDB_FREE_MAGIC:
			if (!tdb_check_free_record(tdb, off, &rec, hashes))
				goto free;
			break;
		/* If we crash after ftruncate, we can get zeroes or fill. */
		case TDB_RECOVERY_INVALID_MAGIC:
		case 0x42424242:
			if (recovery_start == off) {
				found_recovery = true;
				break;
			}
			dead = tdb_dead_space(tdb, off);
			if (dead < sizeof(rec))
				goto corrupt;

			TDB_LOG((tdb, TDB_DEBUG_ERROR,
				 "Dead space at %d-%d (of %u)\n",
				 off, off + dead, tdb->map_size));
			rec.rec_len = dead - sizeof(rec);
			break;
		case TDB_RECOVERY_MAGIC:
			if (recovery_start != off) {
				TDB_LOG((tdb, TDB_DEBUG_ERROR,
					 "Unexpected recovery record at offset %d\n",
					 off));
				goto free;
			}
			found_recovery = true;
			break;
		default: ;
		corrupt:
			tdb->ecode = TDB_ERR_CORRUPT;
			TDB_LOG((tdb, TDB_DEBUG_ERROR,
				 "Bad magic 0x%x at offset %d\n",
				 rec.magic, off));
			goto free;
		}
	}

	/* Now, hashes should all be empty: each record exists and is referred
	 * to by one other. */
	for (h = 0; h < 1+tdb->hash_size; h++) {
		unsigned int i;
		for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
			if (hashes[h][i] != 0) {
				tdb->ecode = TDB_ERR_CORRUPT;
				TDB_LOG((tdb, TDB_DEBUG_ERROR,
					 "Hashes do not match records\n"));
				goto free;
			}
		}
	}

	/* We must have found recovery area if there was one. */
	if (recovery_start != 0 && !found_recovery) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Expected a recovery area at %u\n",
			 recovery_start));
		goto free;
	}

	free(hashes);
	if (locked) {
		tdb_unlockall_read(tdb);
	}
	return 0;

free:
	free(hashes);
unlock:
	if (locked) {
		tdb_unlockall_read(tdb);
	}
	return -1;
}
Example #6
0
/* Add an element into the freelist. Merge adjacent records if
   necessary. */
int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
{
	/* Allocation and tailer lock */
	if (tdb_lock(tdb, -1, F_WRLCK) != 0)
		return -1;

	/* set an initial tailer, so if we fail we don't leave a bogus record */
	if (update_tailer(tdb, offset, rec) != 0) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed!\n"));
		goto fail;
	}

#if USE_RIGHT_MERGES
	/* Look right first (I'm an Australian, dammit) */
	if (offset + sizeof(*rec) + rec->rec_len + sizeof(*rec) <= tdb->map_size) {
		tdb_off_t right = offset + sizeof(*rec) + rec->rec_len;
		struct tdb_record r;

		if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right read failed at %u\n", right));
			goto left;
		}

		/* If it's free, expand to include it. */
		if (r.magic == TDB_FREE_MAGIC) {
			if (remove_from_freelist(tdb, right, r.next) == -1) {
				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right free failed at %u\n", right));
				goto left;
			}
			rec->rec_len += sizeof(r) + r.rec_len;
			if (update_tailer(tdb, offset, rec) == -1) {
				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset));
				goto fail;
			}
		}
	}
left:
#endif

	/* Look left */
	if (offset - sizeof(tdb_off_t) > TDB_DATA_START(tdb->header.hash_size)) {
		tdb_off_t left = offset - sizeof(tdb_off_t);
		struct tdb_record l;
		tdb_off_t leftsize;
		
		/* Read in tailer and jump back to header */
		if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left));
			goto update;
		}

		/* it could be uninitialised data */
		if (leftsize == 0 || leftsize == TDB_PAD_U32) {
			goto update;
		}

		left = offset - leftsize;

		if (leftsize > offset ||
		    left < TDB_DATA_START(tdb->header.hash_size)) {
			goto update;
		}

		/* Now read in the left record */
		if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
			goto update;
		}

		/* If it's free, expand to include it. */
		if (l.magic == TDB_FREE_MAGIC) {
			/* we now merge the new record into the left record, rather than the other 
			   way around. This makes the operation O(1) instead of O(n). This change
			   prevents traverse from being O(n^2) after a lot of deletes */
			l.rec_len += sizeof(*rec) + rec->rec_len;
			if (tdb_rec_write(tdb, left, &l) == -1) {
				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_left failed at %u\n", left));
				goto fail;
			}
			if (update_tailer(tdb, left, &l) == -1) {
				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset));
				goto fail;
			}
			tdb_unlock(tdb, -1, F_WRLCK);
			return 0;
		}
	}

update:

	/* Now, prepend to free list */
	rec->magic = TDB_FREE_MAGIC;

	if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
	    tdb_rec_write(tdb, offset, rec) == -1 ||
	    tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%d\n", offset));
		goto fail;
	}

	/* And we're done. */
	tdb_unlock(tdb, -1, F_WRLCK);
	return 0;

 fail:
	tdb_unlock(tdb, -1, F_WRLCK);
	return -1;
}
Example #7
0
int tdb_check(struct tdb_context *tdb,
	      int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
	      void *private_data)
{
	unsigned int h;
	unsigned char **hashes;
	tdb_off_t off, recovery_start;
	struct tdb_record rec;
	bool found_recovery = false;

	if (tdb_lockall(tdb) == -1)
		return -1;

	/* Make sure we know true size of the underlying file. */
	tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);

	/* Header must be OK: also gets us the recovery ptr, if any. */
	if (!tdb_check_header(tdb, &recovery_start))
		goto unlock;

	/* We should have the whole header, too. */
	if (tdb->map_size < TDB_DATA_START(tdb->header.hash_size)) {
		tdb->ecode = TDB_ERR_CORRUPT;
		TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
		goto unlock;
	}

	/* One big malloc: pointers then bit arrays. */
	hashes = (unsigned char **)calloc(
			1, sizeof(hashes[0]) * (1+tdb->header.hash_size)
			+ BITMAP_BITS / CHAR_BIT * (1+tdb->header.hash_size));
	if (!hashes) {
		tdb->ecode = TDB_ERR_OOM;
		goto unlock;
	}

	/* Initialize pointers */
	hashes[0] = (unsigned char *)(&hashes[1+tdb->header.hash_size]);
	for (h = 1; h < 1+tdb->header.hash_size; h++)
		hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;

	/* Freelist and hash headers are all in a row: read them. */
	for (h = 0; h < 1+tdb->header.hash_size; h++) {
		if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
				 &off) == -1)
			goto free;
		if (off)
			record_offset(hashes[h], off);
	}

	/* For each record, read it in and check it's ok. */
	for (off = TDB_DATA_START(tdb->header.hash_size);
	     off < tdb->map_size;
	     off += sizeof(rec) + rec.rec_len) {
		if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
					   DOCONV()) == -1)
			goto free;
		switch (rec.magic) {
		case TDB_MAGIC:
		case TDB_DEAD_MAGIC:
			if (!tdb_check_used_record(tdb, off, &rec, hashes,
						   check, private_data))
				goto free;
			break;
		case TDB_FREE_MAGIC:
			if (!tdb_check_free_record(tdb, off, &rec, hashes))
				goto free;
			break;
		case TDB_RECOVERY_MAGIC:
		case 0: /* Used for invalid (or in-progress) recovery area. */
			if (recovery_start != off) {
				TDB_LOG((tdb, TDB_DEBUG_ERROR,
					 "Unexpected recovery record at offset %d\n",
					 off));
				goto free;
			}
			found_recovery = true;
			break;
		default:
			tdb->ecode = TDB_ERR_CORRUPT;
			TDB_LOG((tdb, TDB_DEBUG_ERROR,
				 "Bad magic 0x%x at offset %d\n",
				 rec.magic, off));
			goto free;
		}
	}

	/* Now, hashes should all be empty: each record exists and is referred
	 * to by one other. */
	for (h = 0; h < 1+tdb->header.hash_size; h++) {
		unsigned int i;
		for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
			if (hashes[h][i] != 0) {
				tdb->ecode = TDB_ERR_CORRUPT;
				TDB_LOG((tdb, TDB_DEBUG_ERROR,
					 "Hashes do not match records\n"));
				goto free;
			}
		}
	}

	/* We must have found recovery area if there was one. */
	if (recovery_start != 0 && !found_recovery) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Expected %s recovery area, got %s\n",
			 recovery_start ? "a" : "no",
			 found_recovery ? "one" : "none"));
		goto free;
	}

	free(hashes);
	tdb_unlockall(tdb);
	return 0;

free:
	free(hashes);
unlock:
	tdb_unlockall(tdb);
	return -1;
}
Example #8
0
_PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
{
	off_t file_size;
	tdb_off_t off, rec_off;
	struct tally freet, keys, data, dead, extra, hashval, uncoal;
	struct tdb_record rec;
	char *ret = NULL;
	bool locked;
	size_t unc = 0;
	int len;
	struct tdb_record recovery;

	/* Read-only databases use no locking at all: it's best-effort.
	 * We may have a write lock already, so skip that case too. */
	if (tdb->read_only || tdb->allrecord_lock.count != 0) {
		locked = false;
	} else {
		if (tdb_lockall_read(tdb) == -1)
			return NULL;
		locked = true;
	}

	if (tdb_recovery_area(tdb, tdb->methods, &rec_off, &recovery) != 0) {
		goto unlock;
	}

	tally_init(&freet);
	tally_init(&keys);
	tally_init(&data);
	tally_init(&dead);
	tally_init(&extra);
	tally_init(&hashval);
	tally_init(&uncoal);

	for (off = TDB_DATA_START(tdb->hash_size);
	     off < tdb->map_size - 1;
	     off += sizeof(rec) + rec.rec_len) {
		if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
					   DOCONV()) == -1)
			goto unlock;
		switch (rec.magic) {
		case TDB_MAGIC:
			tally_add(&keys, rec.key_len);
			tally_add(&data, rec.data_len);
			tally_add(&extra, rec.rec_len - (rec.key_len
							 + rec.data_len));
			if (unc > 1)
				tally_add(&uncoal, unc - 1);
			unc = 0;
			break;
		case TDB_FREE_MAGIC:
			tally_add(&freet, rec.rec_len);
			unc++;
			break;
		/* If we crash after ftruncate, we can get zeroes or fill. */
		case TDB_RECOVERY_INVALID_MAGIC:
		case 0x42424242:
			unc++;
			/* If it's a valid recovery, we can trust rec_len. */
			if (off != rec_off) {
				rec.rec_len = tdb_dead_space(tdb, off)
					- sizeof(rec);
			}
			/* Fall through */
		case TDB_DEAD_MAGIC:
			tally_add(&dead, rec.rec_len);
			break;
		default:
			TDB_LOG((tdb, TDB_DEBUG_ERROR,
				 "Unexpected record magic 0x%x at offset %u\n",
				 rec.magic, off));
			goto unlock;
		}
	}
	if (unc > 1)
		tally_add(&uncoal, unc - 1);

	for (off = 0; off < tdb->hash_size; off++)
		tally_add(&hashval, get_hash_length(tdb, off));

	file_size = tdb->hdr_ofs + tdb->map_size;

	len = asprintf(&ret, SUMMARY_FORMAT,
		 (unsigned long long)file_size, keys.total+data.total,
		 (size_t)tdb->hdr_ofs, (size_t)tdb->map_size,
		 keys.num,
		 (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no",
		 (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS,
		 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX)?"yes":"no",
		 keys.min, tally_mean(&keys), keys.max,
		 data.min, tally_mean(&data), data.max,
		 extra.min, tally_mean(&extra), extra.max,
		 dead.num,
		 dead.min, tally_mean(&dead), dead.max,
		 freet.num,
		 freet.min, tally_mean(&freet), freet.max,
		 hashval.num,
		 hashval.min, tally_mean(&hashval), hashval.max,
		 uncoal.total,
		 uncoal.min, tally_mean(&uncoal), uncoal.max,
		 keys.total * 100.0 / file_size,
		 data.total * 100.0 / file_size,
		 extra.total * 100.0 / file_size,
		 freet.total * 100.0 / file_size,
		 dead.total * 100.0 / file_size,
		 (keys.num + freet.num + dead.num)
		 * (sizeof(struct tdb_record) + sizeof(uint32_t))
		 * 100.0 / file_size,
		 tdb->hash_size * sizeof(tdb_off_t)
		 * 100.0 / file_size);
	if (len == -1) {
		goto unlock;
	}

unlock:
	if (locked) {
		tdb_unlockall_read(tdb);
	}
	return ret;
}
Example #9
0
File: tdb.c Project: gojdic/samba
/*
  wipe the entire database, deleting all records. This can be done
  very fast by using a global lock. The entire data portion of the
  file becomes a single entry in the freelist.

  This code carefully steps around the recovery area, leaving it alone
 */
int tdb_wipe_all(struct tdb_context *tdb)
{
	int i;
	tdb_off_t offset = 0;
	ssize_t data_len;
	tdb_off_t recovery_head;
	tdb_len_t recovery_size = 0;

	if (tdb_lockall(tdb) != 0) {
		return -1;
	}

	/* see if the tdb has a recovery area, and remember its size
	   if so. We don't want to lose this as otherwise each
	   tdb_wipe_all() in a transaction will increase the size of
	   the tdb by the size of the recovery area */
	if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
		goto failed;
	}

	if (recovery_head != 0) {
		struct list_struct rec;
		if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
			return -1;
		}	
		recovery_size = rec.rec_len + sizeof(rec);
	}

	/* wipe the hashes */
	for (i=0;i<tdb->header.hash_size;i++) {
		if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
			goto failed;
		}
	}

	/* wipe the freelist */
	if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
		goto failed;
	}

	/* add all the rest of the file to the freelist, possibly leaving a gap 
	   for the recovery area */
	if (recovery_size == 0) {
		/* the simple case - the whole file can be used as a freelist */
		data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
		if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
			goto failed;
		}
	} else {
		/* we need to add two freelist entries - one on either
		   side of the recovery area 

		   Note that we cannot shift the recovery area during
		   this operation. Only the transaction.c code may
		   move the recovery area or we risk subtle data
		   corruption
		*/
		data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
		if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
			goto failed;
		}
		/* and the 2nd free list entry after the recovery area - if any */
		data_len = tdb->map_size - (recovery_head+recovery_size);
		if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
			goto failed;
		}
	}

	if (tdb_unlockall(tdb) != 0) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
		goto failed;
	}

	return 0;

failed:
	tdb_unlockall(tdb);
	return -1;
}