Beispiel #1
0
/*
  get the tdb sequence number. Only makes sense if the writers opened
  with TDB1_SEQNUM set. Note that this sequence number will wrap quite
  quickly, so it should only be used for a 'has something changed'
  test, not for code that relies on the count of the number of changes
  made. If you want a counter then use a tdb record.

  The aim of this sequence number is to allow for a very lightweight
  test of a possible tdb change.
*/
int tdb1_get_seqnum(struct tdb_context *tdb)
{
	tdb1_off_t seqnum=0;

	tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
	return seqnum;
}
Beispiel #2
0
/*
 * See if we have a dead record around with enough space
 */
static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
			       struct tdb1_record *r, tdb1_len_t length)
{
	tdb1_off_t rec_ptr;

	/* read in the hash top */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
		return 0;

	/* keep looking until we find the right record */
	while (rec_ptr) {
		if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
			return 0;

		if (TDB1_DEAD(r) && r->rec_len >= length) {
			/*
			 * First fit for simple coding, TODO: change to best
			 * fit
			 */
			return rec_ptr;
		}
		rec_ptr = r->next;
	}
	return 0;
}
Beispiel #3
0
/* actually delete an entry in the database given the offset */
int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
{
	tdb1_off_t last_ptr, i;
	struct tdb1_record lastrec;

	if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;

	if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
	    tdb1_write_lock_record(tdb, rec_ptr) == -1) {
		/* Someone traversing here: mark it as dead */
		rec->magic = TDB1_DEAD_MAGIC;
		return tdb1_rec_write(tdb, rec_ptr, rec);
	}
	if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
		return -1;

	/* find previous record in hash chain */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
		return -1;
	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
		if (tdb1_rec_read(tdb, i, &lastrec) == -1)
			return -1;

	/* unlink it: next ptr is at start of record. */
	if (last_ptr == 0)
		last_ptr = TDB1_HASH_TOP(rec->full_hash);
	if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
		return -1;

	/* recover the space */
	if (tdb1_free(tdb, rec_ptr, rec) == -1)
		return -1;
	return 0;
}
Beispiel #4
0
/*
 * Purge all DEAD records from a hash chain
 */
static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
{
	int res = -1;
	struct tdb1_record rec;
	tdb1_off_t rec_ptr;

	if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
		return -1;
	}

	/* read in the hash top */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
		goto fail;

	while (rec_ptr) {
		tdb1_off_t next;

		if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
			goto fail;
		}

		next = rec.next;

		if (rec.magic == TDB1_DEAD_MAGIC
		    && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
			goto fail;
		}
		rec_ptr = next;
	}
	res = 0;
 fail:
	tdb1_unlock(tdb, -1, F_WRLCK);
	return res;
}
Beispiel #5
0
/* Returns 0 on fail; last_error will be TDB_ERR_NOEXIST if it simply
 * wasn't there, otherwise a real error.
 * On success, return offset of record, and fills in rec */
static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
			struct tdb1_record *r)
{
	tdb1_off_t rec_ptr;

	/* read in the hash top */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
		return 0;

	/* keep looking until we find the right record */
	while (rec_ptr) {
		if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
			return 0;

		tdb->stats.compares++;
		if (TDB1_DEAD(r)) {
			tdb->stats.compare_wrong_bucket++;
		} else if (key.dsize != r->key_len) {
			tdb->stats.compare_wrong_keylen++;
		} else if (hash != r->full_hash) {
			tdb->stats.compare_wrong_rechash++;
		} else {
			enum TDB_ERROR ecode;
			bool matches;
			ecode = tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
						r->key_len, tdb1_key_compare,
						&matches);

			if (ecode != TDB_SUCCESS) {
				tdb->last_error = ecode;
				return 0;
			}

			if (!matches) {
				tdb->stats.compare_wrong_keycmp++;
			} else {
				return rec_ptr;
			}
		}
		/* detect tight infinite loop */
		if (rec_ptr == r->next) {
			tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
						TDB_LOG_ERROR,
						"tdb1_find: loop detected.");
			return 0;
		}
		rec_ptr = r->next;
	}
	tdb->last_error = TDB_ERR_NOEXIST;
	return 0;
}
Beispiel #6
0
/*
  non-blocking increment of the tdb sequence number if the tdb has been opened using
  the TDB_SEQNUM flag
*/
void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
{
	tdb1_off_t seqnum=0;

	if (!(tdb->flags & TDB_SEQNUM)) {
		return;
	}

	/* we ignore errors from this, as we have no sane way of
	   dealing with them.
	*/
	tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
	seqnum++;
	tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
}
Beispiel #7
0
static size_t get_hash_length(struct tdb_context *tdb, unsigned int i)
{
	tdb1_off_t rec_ptr;
	size_t count = 0;

	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(i), &rec_ptr) == -1)
		return 0;

	/* keep looking until we find the right record */
	while (rec_ptr) {
		struct tdb1_record r;
		++count;
		if (tdb1_rec_read(tdb, rec_ptr, &r) == -1)
			return 0;
		rec_ptr = r.next;
	}
	return count;
}
Beispiel #8
0
static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
{
	int res = 0;
	tdb1_off_t rec_ptr;
	struct tdb1_record rec;

	/* read in the hash top */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
		return 0;

	while (rec_ptr) {
		if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
			return 0;

		if (rec.magic == TDB1_DEAD_MAGIC) {
			res += 1;
		}
		rec_ptr = rec.next;
	}
	return res;
}
Beispiel #9
0
/*
  wipe the entire database, deleting all records. This can be done
  very fast by using a allrecord lock. The entire data portion of the
  file becomes a single entry in the freelist.

  This code carefully steps around the recovery area, leaving it alone
 */
int tdb1_wipe_all(struct tdb_context *tdb)
{
	int i;
	tdb1_off_t offset = 0;
	ssize_t data_len;
	tdb1_off_t recovery_head;
	tdb1_len_t recovery_size = 0;

	if (tdb_lockall(tdb) != TDB_SUCCESS) {
		return -1;
	}


	/* see if the tdb has a recovery area, and remember its size
	   if so. We don't want to lose this as otherwise each
	   tdb1_wipe_all() in a transaction will increase the size of
	   the tdb by the size of the recovery area */
	if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_wipe_all: failed to read recovery head");
		goto failed;
	}

	if (recovery_head != 0) {
		struct tdb1_record rec;
		if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_wipe_all: failed to read recovery record");
			return -1;
		}
		recovery_size = rec.rec_len + sizeof(rec);
	}

	/* wipe the hashes */
	for (i=0;i<tdb->tdb1.header.hash_size;i++) {
		if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_wipe_all: failed to write hash %d", i);
			goto failed;
		}
	}

	/* wipe the freelist */
	if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_wipe_all: failed to write freelist");
		goto failed;
	}

	/* add all the rest of the file to the freelist, possibly leaving a gap
	   for the recovery area */
	if (recovery_size == 0) {
		/* the simple case - the whole file can be used as a freelist */
		data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
		if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
			goto failed;
		}
	} else {
		/* we need to add two freelist entries - one on either
		   side of the recovery area

		   Note that we cannot shift the recovery area during
		   this operation. Only the transaction.c code may
		   move the recovery area or we risk subtle data
		   corruption
		*/
		data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
		if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
			goto failed;
		}
		/* and the 2nd free list entry after the recovery area - if any */
		data_len = tdb->file->map_size - (recovery_head+recovery_size);
		if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
			goto failed;
		}
	}

	tdb1_increment_seqnum_nonblock(tdb);
	tdb_unlockall(tdb);
	return 0;

failed:
	tdb_unlockall(tdb);
	return -1;
}
Beispiel #10
0
static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
		       TDB_DATA dbuf, int flag, uint32_t hash)
{
	struct tdb1_record rec;
	tdb1_off_t rec_ptr;
	char *p = NULL;
	int ret = -1;

	/* check for it existing, on insert. */
	if (flag == TDB_INSERT) {
		if (tdb1_exists_hash(tdb, key, hash)) {
			tdb->last_error = TDB_ERR_EXISTS;
			goto fail;
		}
		if (tdb->last_error != TDB_ERR_NOEXIST) {
			goto fail;
		}
	} else {
		/* first try in-place update, on modify or replace. */
		if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
			goto done;
		}
		if (tdb->last_error != TDB_SUCCESS) {
			if (tdb->last_error != TDB_ERR_NOEXIST) {
				goto fail;
			}
			if (flag == TDB_MODIFY) {
				/* if the record doesn't exist and we are in TDB1_MODIFY mode then
				   we should fail the store */
				goto fail;
			}
		}
	}
	/* reset the error code potentially set by the tdb1_update() */
	tdb->last_error = TDB_SUCCESS;

	/* delete any existing record - if it doesn't exist we don't
           care.  Doing this first reduces fragmentation, and avoids
           coalescing with `allocated' block before it's updated. */
	if (flag != TDB_INSERT)
		tdb1_delete_hash(tdb, key, hash);

	/* Copy key+value *before* allocating free space in case malloc
	   fails and we are left with a dead spot in the tdb. */

	if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
		tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
					     "tdb1_store: out of memory"
					     " allocating copy");
		goto fail;
	}

	memcpy(p, key.dptr, key.dsize);
	if (dbuf.dsize)
		memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);

	if (tdb->tdb1.max_dead_records != 0) {
		/*
		 * Allow for some dead records per hash chain, look if we can
		 * find one that can hold the new record. We need enough space
		 * for key, data and tailer. If we find one, we don't have to
		 * consult the central freelist.
		 */
		rec_ptr = tdb1_find_dead(
			tdb, hash, &rec,
			key.dsize + dbuf.dsize + sizeof(tdb1_off_t));

		if (rec_ptr != 0) {
			rec.key_len = key.dsize;
			rec.data_len = dbuf.dsize;
			rec.full_hash = hash;
			rec.magic = TDB1_MAGIC;
			if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
			    || tdb->tdb1.io->tdb1_write(
				    tdb, rec_ptr + sizeof(rec),
				    p, key.dsize + dbuf.dsize) == -1) {
				goto fail;
			}
			goto done;
		}
	}

	/*
	 * We have to allocate some space from the freelist, so this means we
	 * have to lock it. Use the chance to purge all the DEAD records from
	 * the hash chain under the freelist lock.
	 */

	if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
		goto fail;
	}

	if ((tdb->tdb1.max_dead_records != 0)
	    && (tdb1_purge_dead(tdb, hash) == -1)) {
		tdb1_unlock(tdb, -1, F_WRLCK);
		goto fail;
	}

	/* we have to allocate some space */
	rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);

	tdb1_unlock(tdb, -1, F_WRLCK);

	if (rec_ptr == 0) {
		goto fail;
	}

	/* Read hash top into next ptr */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
		goto fail;

	rec.key_len = key.dsize;
	rec.data_len = dbuf.dsize;
	rec.full_hash = hash;
	rec.magic = TDB1_MAGIC;

	/* write out and point the top of the hash chain at it */
	if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
	    || tdb->tdb1.io->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
	    || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
		/* Need to tdb1_unallocate() here */
		goto fail;
	}

 done:
	ret = 0;
 fail:
	if (ret == 0) {
		tdb1_increment_seqnum(tdb);
	}

	SAFE_FREE(p);
	return ret;
}
Beispiel #11
0
/* Add an element into the freelist. Merge adjacent records if
   necessary. */
int tdb1_free(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
{
	/* Allocation and tailer lock */
	if (tdb1_lock(tdb, -1, F_WRLCK) != 0)
		return -1;

	/* set an initial tailer, so if we fail we don't leave a bogus record */
	if (update_tailer(tdb, offset, rec) != 0) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb_free: update_tailer failed!\n");
		goto fail;
	}

	tdb->stats.alloc_coalesce_tried++;
	/* Look left */
	if (offset - sizeof(tdb1_off_t) > TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
		tdb1_off_t left = offset - sizeof(tdb1_off_t);
		struct tdb1_record l;
		tdb1_off_t leftsize;

		/* Read in tailer and jump back to header */
		if (tdb1_ofs_read(tdb, left, &leftsize) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_free: left offset read failed at %u", left);
			goto update;
		}

		/* it could be uninitialised data */
		if (leftsize == 0 || leftsize == TDB1_PAD_U32) {
			goto update;
		}

		left = offset - leftsize;

		if (leftsize > offset ||
		    left < TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
			goto update;
		}

		/* Now read in the left record */
		if (tdb->tdb1.io->tdb1_read(tdb, left, &l, sizeof(l), TDB1_DOCONV()) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_free: left read failed at %u (%u)", left, leftsize);
			goto update;
		}

		/* If it's free, expand to include it. */
		if (l.magic == TDB1_FREE_MAGIC) {
			/* we now merge the new record into the left record, rather than the other
			   way around. This makes the operation O(1) instead of O(n). This change
			   prevents traverse from being O(n^2) after a lot of deletes */
			l.rec_len += sizeof(*rec) + rec->rec_len;
			if (tdb1_rec_write(tdb, left, &l) == -1) {
				tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
					   "tdb1_free: update_left failed at %u", left);
				goto fail;
			}
			if (update_tailer(tdb, left, &l) == -1) {
				tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
					   "tdb1_free: update_tailer failed at %u", offset);
				goto fail;
			}
			tdb->stats.alloc_coalesce_succeeded++;
			tdb->stats.alloc_coalesce_num_merged++;
			tdb->stats.frees++;
			tdb1_unlock(tdb, -1, F_WRLCK);
			return 0;
		}
	}

update:

	/* Now, prepend to free list */
	rec->magic = TDB1_FREE_MAGIC;

	if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec->next) == -1 ||
	    tdb1_rec_write(tdb, offset, rec) == -1 ||
	    tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_free record write failed at offset=%d",
			   offset);
		goto fail;
	}

	/* And we're done. */
	tdb->stats.frees++;
	tdb1_unlock(tdb, -1, F_WRLCK);
	return 0;

 fail:
	tdb1_unlock(tdb, -1, F_WRLCK);
	return -1;
}
Beispiel #12
0
/* allocate some space from the free list. The offset returned points
   to a unconnected tdb1_record within the database with room for at
   least length bytes of total data

   0 is returned if the space could not be allocated
 */
tdb1_off_t tdb1_allocate(struct tdb_context *tdb, tdb1_len_t length, struct tdb1_record *rec)
{
	tdb1_off_t rec_ptr, last_ptr, newrec_ptr;
	struct {
		tdb1_off_t rec_ptr, last_ptr;
		tdb1_len_t rec_len;
	} bestfit;
	float multiplier = 1.0;

	if (tdb1_lock(tdb, -1, F_WRLCK) == -1)
		return 0;

	/* over-allocate to reduce fragmentation */
	length *= 1.25;

	/* Extra bytes required for tailer */
	length += sizeof(tdb1_off_t);
	length = TDB1_ALIGN(length, TDB1_ALIGNMENT);

 again:
	last_ptr = TDB1_FREELIST_TOP;

	/* read in the freelist top */
	if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec_ptr) == -1)
		goto fail;

	bestfit.rec_ptr = 0;
	bestfit.last_ptr = 0;
	bestfit.rec_len = 0;

	/*
	   this is a best fit allocation strategy. Originally we used
	   a first fit strategy, but it suffered from massive fragmentation
	   issues when faced with a slowly increasing record size.
	 */
	while (rec_ptr) {
		if (tdb1_rec_free_read(tdb, rec_ptr, rec) == -1) {
			goto fail;
		}

		if (rec->rec_len >= length) {
			if (bestfit.rec_ptr == 0 ||
			    rec->rec_len < bestfit.rec_len) {
				bestfit.rec_len = rec->rec_len;
				bestfit.rec_ptr = rec_ptr;
				bestfit.last_ptr = last_ptr;
			}
		}

		/* move to the next record */
		last_ptr = rec_ptr;
		rec_ptr = rec->next;

		/* if we've found a record that is big enough, then
		   stop searching if its also not too big. The
		   definition of 'too big' changes as we scan
		   through */
		if (bestfit.rec_len > 0 &&
		    bestfit.rec_len < length * multiplier) {
			break;
		}

		/* this multiplier means we only extremely rarely
		   search more than 50 or so records. At 50 records we
		   accept records up to 11 times larger than what we
		   want */
		multiplier *= 1.05;
	}

	if (bestfit.rec_ptr != 0) {
		if (tdb1_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
			goto fail;
		}

		newrec_ptr = tdb1_allocate_ofs(tdb, length, bestfit.rec_ptr,
					      rec, bestfit.last_ptr);
		tdb1_unlock(tdb, -1, F_WRLCK);
		return newrec_ptr;
	}

	/* we didn't find enough space. See if we can expand the
	   database and if we can then try again */
	if (tdb1_expand(tdb, length + sizeof(*rec)) == 0)
		goto again;
 fail:
	tdb1_unlock(tdb, -1, F_WRLCK);
	return 0;
}