Example #1
0
/*
 * Purge all DEAD records from a hash chain
 */
static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
{
	int res = -1;
	struct tdb1_record rec;
	tdb1_off_t rec_ptr;

	if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
		return -1;
	}

	/* read in the hash top */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
		goto fail;

	while (rec_ptr) {
		tdb1_off_t next;

		if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
			goto fail;
		}

		next = rec.next;

		if (rec.magic == TDB1_DEAD_MAGIC
		    && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
			goto fail;
		}
		rec_ptr = next;
	}
	res = 0;
 fail:
	tdb1_unlock(tdb, -1, F_WRLCK);
	return res;
}
Example #2
0
/* As tdb1_find, but if you succeed, keep the lock */
tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
			   struct tdb1_record *rec)
{
	uint32_t rec_ptr;

	if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
		return 0;
	if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
		tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
	return rec_ptr;
}
Example #3
0
/* delete an entry in the database given a key */
static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
{
	tdb1_off_t rec_ptr;
	struct tdb1_record rec;
	int ret;

	if (tdb->tdb1.max_dead_records != 0) {

		/*
		 * Allow for some dead records per hash chain, mainly for
		 * tdb's with a very high create/delete rate like locking.tdb.
		 */

		if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
			return -1;

		if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
			/*
			 * Don't let the per-chain freelist grow too large,
			 * delete all existing dead records
			 */
			tdb1_purge_dead(tdb, hash);
		}

		if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
			tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
			return -1;
		}

		/*
		 * Just mark the record as dead.
		 */
		rec.magic = TDB1_DEAD_MAGIC;
		ret = tdb1_rec_write(tdb, rec_ptr, &rec);
	}
	else {
		if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
						   &rec)))
			return -1;

		ret = tdb1_do_delete(tdb, rec_ptr, &rec);
	}

	if (ret == 0) {
		tdb1_increment_seqnum(tdb);
	}

	if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_delete: WARNING tdb1_unlock failed!");
	return ret;
}
Example #4
0
/* Append to an entry. Create if not exist. */
int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
{
	uint32_t hash;
	TDB_DATA dbuf;
	int ret = -1;

	assert(tdb->flags & TDB_VERSION1);

	/* find which hash bucket it is in */
	hash = tdb_hash(tdb, key.dptr, key.dsize);
	if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
		return -1;

	dbuf = _tdb1_fetch(tdb, key);

	if (dbuf.dptr == NULL) {
		dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
	} else {
		unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
		unsigned char *new_dptr;

		/* realloc '0' is special: don't do that. */
		if (new_len == 0)
			new_len = 1;
		new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
		if (new_dptr == NULL) {
			free(dbuf.dptr);
		}
		dbuf.dptr = new_dptr;
	}

	if (dbuf.dptr == NULL) {
		tdb->last_error = TDB_ERR_OOM;
		goto failed;
	}

	memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
	dbuf.dsize += new_dbuf.dsize;

	ret = _tdb1_store(tdb, key, dbuf, 0, hash);

failed:
	tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
	SAFE_FREE(dbuf.dptr);
	return ret;
}
Example #5
0
/* store an element in the database, replacing any existing element
   with the same key

   return 0 on success, -1 on failure
*/
int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
{
	uint32_t hash;
	int ret;

	assert(tdb->flags & TDB_VERSION1);

	if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
		tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
					     TDB_LOG_USE_ERROR,
					     "tdb_store: read-only tdb");
		return -1;
	}

	/* find which hash bucket it is in */
	hash = tdb_hash(tdb, key.dptr, key.dsize);
	if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
		return -1;

	ret = _tdb1_store(tdb, key, dbuf, flag, hash);
	tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
	return ret;
}
Example #6
0
static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
		       TDB_DATA dbuf, int flag, uint32_t hash)
{
	struct tdb1_record rec;
	tdb1_off_t rec_ptr;
	char *p = NULL;
	int ret = -1;

	/* check for it existing, on insert. */
	if (flag == TDB_INSERT) {
		if (tdb1_exists_hash(tdb, key, hash)) {
			tdb->last_error = TDB_ERR_EXISTS;
			goto fail;
		}
		if (tdb->last_error != TDB_ERR_NOEXIST) {
			goto fail;
		}
	} else {
		/* first try in-place update, on modify or replace. */
		if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
			goto done;
		}
		if (tdb->last_error != TDB_SUCCESS) {
			if (tdb->last_error != TDB_ERR_NOEXIST) {
				goto fail;
			}
			if (flag == TDB_MODIFY) {
				/* if the record doesn't exist and we are in TDB1_MODIFY mode then
				   we should fail the store */
				goto fail;
			}
		}
	}
	/* reset the error code potentially set by the tdb1_update() */
	tdb->last_error = TDB_SUCCESS;

	/* delete any existing record - if it doesn't exist we don't
           care.  Doing this first reduces fragmentation, and avoids
           coalescing with `allocated' block before it's updated. */
	if (flag != TDB_INSERT)
		tdb1_delete_hash(tdb, key, hash);

	/* Copy key+value *before* allocating free space in case malloc
	   fails and we are left with a dead spot in the tdb. */

	if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
		tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
					     "tdb1_store: out of memory"
					     " allocating copy");
		goto fail;
	}

	memcpy(p, key.dptr, key.dsize);
	if (dbuf.dsize)
		memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);

	if (tdb->tdb1.max_dead_records != 0) {
		/*
		 * Allow for some dead records per hash chain, look if we can
		 * find one that can hold the new record. We need enough space
		 * for key, data and tailer. If we find one, we don't have to
		 * consult the central freelist.
		 */
		rec_ptr = tdb1_find_dead(
			tdb, hash, &rec,
			key.dsize + dbuf.dsize + sizeof(tdb1_off_t));

		if (rec_ptr != 0) {
			rec.key_len = key.dsize;
			rec.data_len = dbuf.dsize;
			rec.full_hash = hash;
			rec.magic = TDB1_MAGIC;
			if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
			    || tdb->tdb1.io->tdb1_write(
				    tdb, rec_ptr + sizeof(rec),
				    p, key.dsize + dbuf.dsize) == -1) {
				goto fail;
			}
			goto done;
		}
	}

	/*
	 * We have to allocate some space from the freelist, so this means we
	 * have to lock it. Use the chance to purge all the DEAD records from
	 * the hash chain under the freelist lock.
	 */

	if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
		goto fail;
	}

	if ((tdb->tdb1.max_dead_records != 0)
	    && (tdb1_purge_dead(tdb, hash) == -1)) {
		tdb1_unlock(tdb, -1, F_WRLCK);
		goto fail;
	}

	/* we have to allocate some space */
	rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);

	tdb1_unlock(tdb, -1, F_WRLCK);

	if (rec_ptr == 0) {
		goto fail;
	}

	/* Read hash top into next ptr */
	if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
		goto fail;

	rec.key_len = key.dsize;
	rec.data_len = dbuf.dsize;
	rec.full_hash = hash;
	rec.magic = TDB1_MAGIC;

	/* write out and point the top of the hash chain at it */
	if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
	    || tdb->tdb1.io->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
	    || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
		/* Need to tdb1_unallocate() here */
		goto fail;
	}

 done:
	ret = 0;
 fail:
	if (ret == 0) {
		tdb1_increment_seqnum(tdb);
	}

	SAFE_FREE(p);
	return ret;
}
Example #7
0
/* Add an element into the freelist. Merge adjacent records if
   necessary. */
int tdb1_free(struct tdb_context *tdb, tdb1_off_t offset, struct tdb1_record *rec)
{
	/* Allocation and tailer lock */
	if (tdb1_lock(tdb, -1, F_WRLCK) != 0)
		return -1;

	/* set an initial tailer, so if we fail we don't leave a bogus record */
	if (update_tailer(tdb, offset, rec) != 0) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb_free: update_tailer failed!\n");
		goto fail;
	}

	tdb->stats.alloc_coalesce_tried++;
	/* Look left */
	if (offset - sizeof(tdb1_off_t) > TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
		tdb1_off_t left = offset - sizeof(tdb1_off_t);
		struct tdb1_record l;
		tdb1_off_t leftsize;

		/* Read in tailer and jump back to header */
		if (tdb1_ofs_read(tdb, left, &leftsize) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_free: left offset read failed at %u", left);
			goto update;
		}

		/* it could be uninitialised data */
		if (leftsize == 0 || leftsize == TDB1_PAD_U32) {
			goto update;
		}

		left = offset - leftsize;

		if (leftsize > offset ||
		    left < TDB1_DATA_START(tdb->tdb1.header.hash_size)) {
			goto update;
		}

		/* Now read in the left record */
		if (tdb->tdb1.io->tdb1_read(tdb, left, &l, sizeof(l), TDB1_DOCONV()) == -1) {
			tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
				   "tdb1_free: left read failed at %u (%u)", left, leftsize);
			goto update;
		}

		/* If it's free, expand to include it. */
		if (l.magic == TDB1_FREE_MAGIC) {
			/* we now merge the new record into the left record, rather than the other
			   way around. This makes the operation O(1) instead of O(n). This change
			   prevents traverse from being O(n^2) after a lot of deletes */
			l.rec_len += sizeof(*rec) + rec->rec_len;
			if (tdb1_rec_write(tdb, left, &l) == -1) {
				tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
					   "tdb1_free: update_left failed at %u", left);
				goto fail;
			}
			if (update_tailer(tdb, left, &l) == -1) {
				tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
					   "tdb1_free: update_tailer failed at %u", offset);
				goto fail;
			}
			tdb->stats.alloc_coalesce_succeeded++;
			tdb->stats.alloc_coalesce_num_merged++;
			tdb->stats.frees++;
			tdb1_unlock(tdb, -1, F_WRLCK);
			return 0;
		}
	}

update:

	/* Now, prepend to free list */
	rec->magic = TDB1_FREE_MAGIC;

	if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec->next) == -1 ||
	    tdb1_rec_write(tdb, offset, rec) == -1 ||
	    tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
		tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
			   "tdb1_free record write failed at offset=%d",
			   offset);
		goto fail;
	}

	/* And we're done. */
	tdb->stats.frees++;
	tdb1_unlock(tdb, -1, F_WRLCK);
	return 0;

 fail:
	tdb1_unlock(tdb, -1, F_WRLCK);
	return -1;
}
Example #8
0
/* allocate some space from the free list. The offset returned points
   to a unconnected tdb1_record within the database with room for at
   least length bytes of total data

   0 is returned if the space could not be allocated
 */
tdb1_off_t tdb1_allocate(struct tdb_context *tdb, tdb1_len_t length, struct tdb1_record *rec)
{
	tdb1_off_t rec_ptr, last_ptr, newrec_ptr;
	struct {
		tdb1_off_t rec_ptr, last_ptr;
		tdb1_len_t rec_len;
	} bestfit;
	float multiplier = 1.0;

	if (tdb1_lock(tdb, -1, F_WRLCK) == -1)
		return 0;

	/* over-allocate to reduce fragmentation */
	length *= 1.25;

	/* Extra bytes required for tailer */
	length += sizeof(tdb1_off_t);
	length = TDB1_ALIGN(length, TDB1_ALIGNMENT);

 again:
	last_ptr = TDB1_FREELIST_TOP;

	/* read in the freelist top */
	if (tdb1_ofs_read(tdb, TDB1_FREELIST_TOP, &rec_ptr) == -1)
		goto fail;

	bestfit.rec_ptr = 0;
	bestfit.last_ptr = 0;
	bestfit.rec_len = 0;

	/*
	   this is a best fit allocation strategy. Originally we used
	   a first fit strategy, but it suffered from massive fragmentation
	   issues when faced with a slowly increasing record size.
	 */
	while (rec_ptr) {
		if (tdb1_rec_free_read(tdb, rec_ptr, rec) == -1) {
			goto fail;
		}

		if (rec->rec_len >= length) {
			if (bestfit.rec_ptr == 0 ||
			    rec->rec_len < bestfit.rec_len) {
				bestfit.rec_len = rec->rec_len;
				bestfit.rec_ptr = rec_ptr;
				bestfit.last_ptr = last_ptr;
			}
		}

		/* move to the next record */
		last_ptr = rec_ptr;
		rec_ptr = rec->next;

		/* if we've found a record that is big enough, then
		   stop searching if its also not too big. The
		   definition of 'too big' changes as we scan
		   through */
		if (bestfit.rec_len > 0 &&
		    bestfit.rec_len < length * multiplier) {
			break;
		}

		/* this multiplier means we only extremely rarely
		   search more than 50 or so records. At 50 records we
		   accept records up to 11 times larger than what we
		   want */
		multiplier *= 1.05;
	}

	if (bestfit.rec_ptr != 0) {
		if (tdb1_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
			goto fail;
		}

		newrec_ptr = tdb1_allocate_ofs(tdb, length, bestfit.rec_ptr,
					      rec, bestfit.last_ptr);
		tdb1_unlock(tdb, -1, F_WRLCK);
		return newrec_ptr;
	}

	/* we didn't find enough space. See if we can expand the
	   database and if we can then try again */
	if (tdb1_expand(tdb, length + sizeof(*rec)) == 0)
		goto again;
 fail:
	tdb1_unlock(tdb, -1, F_WRLCK);
	return 0;
}