Beispiel #1
0
/* read a freelist record and check for simple errors */
int tdb1_rec_free_read(struct tdb_context *tdb, tdb1_off_t off, struct tdb1_record *rec)
{
	if (tdb->tdb1.io->tdb1_read(tdb, off, rec, sizeof(*rec),TDB1_DOCONV()) == -1)
		return -1;

	if (rec->magic == TDB1_MAGIC) {
		/* this happens when a app is showdown while deleting a record - we should
		   not completely fail when this happens */
		tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_WARNING,
			   "tdb1_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
			   rec->magic, off);
		rec->magic = TDB1_FREE_MAGIC;
		if (tdb->tdb1.io->tdb1_write(tdb, off, rec, sizeof(*rec)) == -1)
			return -1;
	}

	if (rec->magic != TDB1_FREE_MAGIC) {
		tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
					"tdb1_rec_free_read bad magic 0x%x at offset=%d\n",
					rec->magic, off);
		return -1;
	}
	if (tdb->tdb1.io->tdb1_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
		return -1;
	return 0;
}
Beispiel #2
0
/*
  wipe the entire database, deleting all records. This can be done
  very fast by using a allrecord lock. The entire data portion of the
  file becomes a single entry in the freelist.

  This code carefully steps around the recovery area, leaving it alone
 */
int tdb1_wipe_all(struct tdb_context *tdb)
{
	int i;
	tdb1_off_t offset = 0;
	ssize_t data_len;
	tdb1_off_t recovery_head;
	tdb1_len_t recovery_size = 0;

	if (tdb_lockall(tdb) != TDB_SUCCESS) {
		return -1;
	}


	/* see if the tdb has a recovery area, and remember its size
	   if so. We don't want to lose this as otherwise each
	   tdb1_wipe_all() in a transaction will increase the size of
	   the tdb by the size of the recovery area */
	if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_wipe_all: failed to read recovery head");
		goto failed;
	}

	if (recovery_head != 0) {
		struct tdb1_record rec;
		if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_wipe_all: failed to read recovery record");
			return -1;
		}
		recovery_size = rec.rec_len + sizeof(rec);
	}

	/* wipe the hashes */
	for (i=0;i<tdb->tdb1.header.hash_size;i++) {
		if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_wipe_all: failed to write hash %d", i);
			goto failed;
		}
	}

	/* wipe the freelist */
	if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_wipe_all: failed to write freelist");
		goto failed;
	}

	/* add all the rest of the file to the freelist, possibly leaving a gap
	   for the recovery area */
	if (recovery_size == 0) {
		/* the simple case - the whole file can be used as a freelist */
		data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
		if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
			goto failed;
		}
	} else {
		/* we need to add two freelist entries - one on either
		   side of the recovery area

		   Note that we cannot shift the recovery area during
		   this operation. Only the transaction.c code may
		   move the recovery area or we risk subtle data
		   corruption
		*/
		data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
		if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
			goto failed;
		}
		/* and the 2nd free list entry after the recovery area - if any */
		data_len = tdb->file->map_size - (recovery_head+recovery_size);
		if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
			goto failed;
		}
	}

	tdb1_increment_seqnum_nonblock(tdb);
	tdb_unlockall(tdb);
	return 0;

failed:
	tdb_unlockall(tdb);
	return -1;
}
Beispiel #3
0
char *tdb1_summary(struct tdb_context *tdb)
{
	tdb1_off_t off, rec_off;
	struct tally freet, keys, data, dead, extra, hash, uncoal;
	struct tdb1_record rec;
	char *ret = NULL;
	bool locked;
	size_t len, unc = 0;
	struct tdb1_record recovery;

	/* We may have a write lock already, so don't lock. */
	if (tdb->file->allrecord_lock.count != 0) {
		locked = false;
	} else {
		if (tdb_lockall_read(tdb) != TDB_SUCCESS)
			return NULL;
		locked = true;
	}

	if (tdb1_recovery_area(tdb, tdb->tdb1.io, &rec_off, &recovery) != 0) {
		goto unlock;
	}

	tally1_init(&freet);
	tally1_init(&keys);
	tally1_init(&data);
	tally1_init(&dead);
	tally1_init(&extra);
	tally1_init(&hash);
	tally1_init(&uncoal);

	for (off = TDB1_DATA_START(tdb->tdb1.header.hash_size);
	     off < tdb->file->map_size - 1;
	     off += sizeof(rec) + rec.rec_len) {
		if (tdb->tdb1.io->tdb1_read(tdb, off, &rec, sizeof(rec),
					   TDB1_DOCONV()) == -1)
			goto unlock;
		switch (rec.magic) {
		case TDB1_MAGIC:
			tally1_add(&keys, rec.key_len);
			tally1_add(&data, rec.data_len);
			tally1_add(&extra, rec.rec_len - (rec.key_len
							 + rec.data_len));
			if (unc > 1)
				tally1_add(&uncoal, unc - 1);
			unc = 0;
			break;
		case TDB1_FREE_MAGIC:
			tally1_add(&freet, rec.rec_len);
			unc++;
			break;
		/* If we crash after ftruncate, we can get zeroes or fill. */
		case TDB1_RECOVERY_INVALID_MAGIC:
		case 0x42424242:
			unc++;
			/* If it's a valid recovery, we can trust rec_len. */
			if (off != rec_off) {
				rec.rec_len = tdb1_dead_space(tdb, off)
					- sizeof(rec);
			}
			/* Fall through */
		case TDB1_DEAD_MAGIC:
			tally1_add(&dead, rec.rec_len);
			break;
		default:
			tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
						TDB_LOG_ERROR,
						"Unexpected record magic 0x%x"
						" at offset %d",
						rec.magic, off);
			goto unlock;
		}
	}
	if (unc > 1)
		tally1_add(&uncoal, unc - 1);

	for (off = 0; off < tdb->tdb1.header.hash_size; off++)
		tally1_add(&hash, get_hash_length(tdb, off));

	/* 20 is max length of a %zu. */
	len = strlen(SUMMARY_FORMAT1) + 35*20 + 1;
	ret = (char *)malloc(len);
	if (!ret)
		goto unlock;

	snprintf(ret, len, SUMMARY_FORMAT1,
		 (tdb1_len_t)tdb->file->map_size, keys.total+data.total,
		 keys.num,
		 keys.min, tally1_mean(&keys), keys.max,
		 data.min, tally1_mean(&data), data.max,
		 extra.min, tally1_mean(&extra), extra.max,
		 dead.num,
		 dead.min, tally1_mean(&dead), dead.max,
		 freet.num,
		 freet.min, tally1_mean(&freet), freet.max,
		 hash.num,
		 hash.min, tally1_mean(&hash), hash.max,
		 uncoal.total,
		 uncoal.min, tally1_mean(&uncoal), uncoal.max,
		 keys.total * 100.0 / tdb->file->map_size,
		 data.total * 100.0 / tdb->file->map_size,
		 extra.total * 100.0 / tdb->file->map_size,
		 freet.total * 100.0 / tdb->file->map_size,
		 dead.total * 100.0 / tdb->file->map_size,
		 (keys.num + freet.num + dead.num)
		 * (sizeof(struct tdb1_record) + sizeof(uint32_t))
		 * 100.0 / tdb->file->map_size,
		 tdb->tdb1.header.hash_size * sizeof(tdb1_off_t)
		 * 100.0 / (tdb1_len_t)tdb->file->map_size);

unlock:
	if (locked) {
		tdb_unlockall_read(tdb);
	}
	return ret;
}
Beispiel #4
0
/* Add an element into the freelist. Merge adjacent records if
   necessary. */
int tdb1_free(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
{
	/* Allocation and tailer lock */
	if (tdb1_lock(tdb, -1, F_WRLCK) != 0)
		return -1;

	/* set an initial tailer, so if we fail we don't leave a bogus record */
	if (update_tailer(tdb, offset, rec) != 0) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb_free: update_tailer failed!\n");
		goto fail;
	}

	tdb->stats.alloc_coalesce_tried++;
	/* Look left */
	if (offset - sizeof(tdb1_off_t) > TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
		tdb1_off_t left = offset - sizeof(tdb1_off_t);
		struct tdb1_record l;
		tdb1_off_t leftsize;

		/* Read in tailer and jump back to header */
		if (tdb1_ofs_read(tdb, left, &leftsize) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_free: left offset read failed at %u", left);
			goto update;
		}

		/* it could be uninitialised data */
		if (leftsize == 0 || leftsize == TDB1_PAD_U32) {
			goto update;
		}

		left = offset - leftsize;

		if (leftsize > offset ||
		    left < TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
			goto update;
		}

		/* Now read in the left record */
		if (tdb->tdb1.io->tdb1_read(tdb, left, &l, sizeof(l), TDB1_DOCONV()) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_free: left read failed at %u (%u)", left, leftsize);
			goto update;
		}

		/* If it's free, expand to include it. */
		if (l.magic == TDB1_FREE_MAGIC) {
			/* we now merge the new record into the left record, rather than the other
			   way around. This makes the operation O(1) instead of O(n). This change
			   prevents traverse from being O(n^2) after a lot of deletes */
			l.rec_len += sizeof(*rec) + rec->rec_len;
			if (tdb1_rec_write(tdb, left, &l) == -1) {
				tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
					   "tdb1_free: update_left failed at %u", left);
				goto fail;
			}
			if (update_tailer(tdb, left, &l) == -1) {
				tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
					   "tdb1_free: update_tailer failed at %u", offset);
				goto fail;
			}
			tdb->stats.alloc_coalesce_succeeded++;
			tdb->stats.alloc_coalesce_num_merged++;
			tdb->stats.frees++;
			tdb1_unlock(tdb, -1, F_WRLCK);
			return 0;
		}
	}

update:

	/* Now, prepend to free list */
	rec->magic = TDB1_FREE_MAGIC;

	if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec->next) == -1 ||
	    tdb1_rec_write(tdb, offset, rec) == -1 ||
	    tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_free record write failed at offset=%d",
			   offset);
		goto fail;
	}

	/* And we're done. */
	tdb->stats.frees++;
	tdb1_unlock(tdb, -1, F_WRLCK);
	return 0;

 fail:
	tdb1_unlock(tdb, -1, F_WRLCK);
	return -1;
}