Esempio n. 1
0
/* Generic record header check. */
static bool tdb_check_record(struct tdb_context *tdb,
			     tdb_off_t off,
			     const struct tdb_record *rec)
{
	tdb_off_t tailer;

	/* Check rec->next: 0 or points to record offset, aligned. */
	if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size)){
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %u too small next %u\n",
			 off, rec->next));
		goto corrupt;
	}
	if (rec->next + sizeof(*rec) < rec->next) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %u too large next %u\n",
			 off, rec->next));
		goto corrupt;
	}
	if ((rec->next % TDB_ALIGNMENT) != 0) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %u misaligned next %u\n",
			 off, rec->next));
		goto corrupt;
	}
	if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0))
		goto corrupt;

	/* Check rec_len: similar to rec->next, implies next record. */
	if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %u misaligned length %u\n",
			 off, rec->rec_len));
		goto corrupt;
	}
	/* Must fit tailer. */
	if (rec->rec_len < sizeof(tailer)) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %u too short length %u\n",
			 off, rec->rec_len));
		goto corrupt;
	}
	/* OOB allows "right at the end" access, so this works for last rec. */
	if (tdb->methods->tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
		goto corrupt;

	/* Check tailer. */
	if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
			 &tailer) == -1)
		goto corrupt;
	if (tailer != sizeof(*rec) + rec->rec_len) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Record offset %u invalid tailer\n", off));
		goto corrupt;
	}

	return true;

corrupt:
	tdb->ecode = TDB_ERR_CORRUPT;
	return false;
}
Esempio n. 2
0
_PUBLIC_ int tdb_check(struct tdb_context *tdb,
	      int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
	      void *private_data)
{
	unsigned int h;
	unsigned char **hashes;
	tdb_off_t off, recovery_start;
	struct tdb_record rec;
	bool found_recovery = false;
	tdb_len_t dead;
	bool locked;

	/* Read-only databases use no locking at all: it's best-effort.
	 * We may have a write lock already, so skip that case too. */
	if (tdb->read_only || tdb->allrecord_lock.count != 0) {
		locked = false;
	} else {
		if (tdb_lockall_read(tdb) == -1)
			return -1;
		locked = true;
	}

	/* Make sure we know true size of the underlying file. */
	tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);

	/* Header must be OK: also gets us the recovery ptr, if any. */
	if (!tdb_check_header(tdb, &recovery_start))
		goto unlock;

	/* We should have the whole header, too. */
	if (tdb->map_size < TDB_DATA_START(tdb->hash_size)) {
		tdb->ecode = TDB_ERR_CORRUPT;
		TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
		goto unlock;
	}

	/* One big malloc: pointers then bit arrays. */
	hashes = (unsigned char **)calloc(
			1, sizeof(hashes[0]) * (1+tdb->hash_size)
			+ BITMAP_BITS / CHAR_BIT * (1+tdb->hash_size));
	if (!hashes) {
		tdb->ecode = TDB_ERR_OOM;
		goto unlock;
	}

	/* Initialize pointers */
	hashes[0] = (unsigned char *)(&hashes[1+tdb->hash_size]);
	for (h = 1; h < 1+tdb->hash_size; h++)
		hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;

	/* Freelist and hash headers are all in a row: read them. */
	for (h = 0; h < 1+tdb->hash_size; h++) {
		if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
				 &off) == -1)
			goto free;
		if (off)
			record_offset(hashes[h], off);
	}

	/* For each record, read it in and check it's ok. */
	for (off = TDB_DATA_START(tdb->hash_size);
	     off < tdb->map_size;
	     off += sizeof(rec) + rec.rec_len) {
		if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
					   DOCONV()) == -1)
			goto free;
		switch (rec.magic) {
		case TDB_MAGIC:
		case TDB_DEAD_MAGIC:
			if (!tdb_check_used_record(tdb, off, &rec, hashes,
						   check, private_data))
				goto free;
			break;
		case TDB_FREE_MAGIC:
			if (!tdb_check_free_record(tdb, off, &rec, hashes))
				goto free;
			break;
		/* If we crash after ftruncate, we can get zeroes or fill. */
		case TDB_RECOVERY_INVALID_MAGIC:
		case 0x42424242:
			if (recovery_start == off) {
				found_recovery = true;
				break;
			}
			dead = tdb_dead_space(tdb, off);
			if (dead < sizeof(rec))
				goto corrupt;

			TDB_LOG((tdb, TDB_DEBUG_ERROR,
				 "Dead space at %u-%u (of %u)\n",
				 off, off + dead, tdb->map_size));
			rec.rec_len = dead - sizeof(rec);
			break;
		case TDB_RECOVERY_MAGIC:
			if (recovery_start != off) {
				TDB_LOG((tdb, TDB_DEBUG_ERROR,
					 "Unexpected recovery record at offset %u\n",
					 off));
				goto free;
			}
			found_recovery = true;
			break;
		default: ;
		corrupt:
			tdb->ecode = TDB_ERR_CORRUPT;
			TDB_LOG((tdb, TDB_DEBUG_ERROR,
				 "Bad magic 0x%x at offset %u\n",
				 rec.magic, off));
			goto free;
		}
	}

	/* Now, hashes should all be empty: each record exists and is referred
	 * to by one other. */
	for (h = 0; h < 1+tdb->hash_size; h++) {
		unsigned int i;
		for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
			if (hashes[h][i] != 0) {
				tdb->ecode = TDB_ERR_CORRUPT;
				TDB_LOG((tdb, TDB_DEBUG_ERROR,
					 "Hashes do not match records\n"));
				goto free;
			}
		}
	}

	/* We must have found recovery area if there was one. */
	if (recovery_start != 0 && !found_recovery) {
		TDB_LOG((tdb, TDB_DEBUG_ERROR,
			 "Expected a recovery area at %u\n",
			 recovery_start));
		goto free;
	}

	free(hashes);
	if (locked) {
		tdb_unlockall_read(tdb);
	}
	return 0;

free:
	free(hashes);
unlock:
	if (locked) {
		tdb_unlockall_read(tdb);
	}
	return -1;
}
Esempio n. 3
0
File: tdb.c Progetto: gojdic/samba
/*
  wipe the entire database, deleting all records. This can be done
  very fast by using a global lock. The entire data portion of the
  file becomes a single entry in the freelist.

  This code carefully steps around the recovery area, leaving it alone
 */
int tdb_wipe_all(struct tdb_context *tdb)
{
	int i;
	tdb_off_t offset = 0;
	ssize_t data_len;
	tdb_off_t recovery_head;
	tdb_len_t recovery_size = 0;

	if (tdb_lockall(tdb) != 0) {
		return -1;
	}

	/* see if the tdb has a recovery area, and remember its size
	   if so. We don't want to lose this as otherwise each
	   tdb_wipe_all() in a transaction will increase the size of
	   the tdb by the size of the recovery area */
	if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
		goto failed;
	}

	if (recovery_head != 0) {
		struct list_struct rec;
		if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
			return -1;
		}	
		recovery_size = rec.rec_len + sizeof(rec);
	}

	/* wipe the hashes */
	for (i=0;i<tdb->header.hash_size;i++) {
		if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
			TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
			goto failed;
		}
	}

	/* wipe the freelist */
	if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
		goto failed;
	}

	/* add all the rest of the file to the freelist, possibly leaving a gap 
	   for the recovery area */
	if (recovery_size == 0) {
		/* the simple case - the whole file can be used as a freelist */
		data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
		if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
			goto failed;
		}
	} else {
		/* we need to add two freelist entries - one on either
		   side of the recovery area 

		   Note that we cannot shift the recovery area during
		   this operation. Only the transaction.c code may
		   move the recovery area or we risk subtle data
		   corruption
		*/
		data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
		if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
			goto failed;
		}
		/* and the 2nd free list entry after the recovery area - if any */
		data_len = tdb->map_size - (recovery_head+recovery_size);
		if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
			goto failed;
		}
	}

	if (tdb_unlockall(tdb) != 0) {
		TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
		goto failed;
	}

	return 0;

failed:
	tdb_unlockall(tdb);
	return -1;
}
Esempio n. 4
0
/* Uses traverse lock: 0 = finish, -1 = error, other = record offset */
static int tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock *tlock,
			 struct list_struct *rec)
{
	int want_next = (tlock->off != 0);

	/* Lock each chain from the start one. */
	for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
		if (!tlock->off && tlock->hash != 0) {
			/* this is an optimisation for the common case where
			   the hash chain is empty, which is particularly
			   common for the use of tdb with ldb, where large
			   hashes are used. In that case we spend most of our
			   time in tdb_brlock(), locking empty hash chains.
			   
			   To avoid this, we do an unlocked pre-check to see
			   if the hash chain is empty before starting to look
			   inside it. If it is empty then we can avoid that
			   hash chain. If it isn't empty then we can't believe
			   the value we get back, as we read it without a
			   lock, so instead we get the lock and re-fetch the
			   value below.
			   
			   Notice that not doing this optimisation on the
			   first hash chain is critical. We must guarantee
			   that we have done at least one fcntl lock at the
			   start of a search to guarantee that memory is
			   coherent on SMP systems. If records are added by
			   others during the search then thats OK, and we
			   could possibly miss those with this trick, but we
			   could miss them anyway without this trick, so the
			   semantics don't change.
			   
			   With a non-indexed ldb search this trick gains us a
			   factor of around 80 in speed on a linux 2.6.x
			   system (testing using ldbtest).
			*/
			tdb->methods->next_hash_chain(tdb, &tlock->hash);
			if (tlock->hash == tdb->header.hash_size) {
				continue;
			}
		}

		if (tdb_lock(tdb, tlock->hash, tlock->lock_rw) == -1)
			return -1;

		/* No previous record?  Start at top of chain. */
		if (!tlock->off) {
			if (tdb_ofs_read(tdb, TDB_HASH_TOP(tlock->hash),
				     &tlock->off) == -1)
				goto fail;
		} else {
			/* Otherwise unlock the previous record. */
			if (tdb_unlock_record(tdb, tlock->off) != 0)
				goto fail;
		}

		if (want_next) {
			/* We have offset of old record: grab next */
			if (tdb_rec_read(tdb, tlock->off, rec) == -1)
				goto fail;
			tlock->off = rec->next;
		}

		/* Iterate through chain */
		while( tlock->off) {
			tdb_off_t current;
			if (tdb_rec_read(tdb, tlock->off, rec) == -1)
				goto fail;

			/* Detect infinite loops. From "Shlomi Yaakobovich" <*****@*****.**>. */
			if (tlock->off == rec->next) {
				TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: loop detected.\n"));
				goto fail;
			}

			if (!TDB_DEAD(rec)) {
				/* Woohoo: we found one! */
				if (tdb_lock_record(tdb, tlock->off) != 0)
					goto fail;
				return tlock->off;
			}

			/* Try to clean dead ones from old traverses */
			current = tlock->off;
			tlock->off = rec->next;
			if (!(tdb->read_only || tdb->traverse_read) && 
			    tdb_do_delete(tdb, current, rec) != 0)
				goto fail;
		}
		tdb_unlock(tdb, tlock->hash, tlock->lock_rw);
		want_next = 0;
	}
	/* We finished iteration without finding anything */
	return TDB_ERRCODE(TDB_SUCCESS, 0);

 fail:
	tlock->off = 0;
	if (tdb_unlock(tdb, tlock->hash, tlock->lock_rw) != 0)
		TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_next_lock: On error unlock failed!\n"));
	return -1;
}
Esempio n. 5
0
File: tdb.c Progetto: gojdic/samba
/* store an element in the database, replacing any existing element
   with the same key 

   return 0 on success, -1 on failure
*/
int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
{
	struct list_struct rec;
	uint32_t hash;
	tdb_off_t rec_ptr;
	char *p = NULL;
	int ret = -1;

	if (tdb->read_only || tdb->traverse_read) {
		tdb->ecode = TDB_ERR_RDONLY;
		return -1;
	}

	/* find which hash bucket it is in */
	hash = tdb->hash_fn(&key);
	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
		return -1;

	/* check for it existing, on insert. */
	if (flag == TDB_INSERT) {
		if (tdb_exists_hash(tdb, key, hash)) {
			tdb->ecode = TDB_ERR_EXISTS;
			goto fail;
		}
	} else {
		/* first try in-place update, on modify or replace. */
		if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
			goto done;
		}
		if (tdb->ecode == TDB_ERR_NOEXIST &&
		    flag == TDB_MODIFY) {
			/* if the record doesn't exist and we are in TDB_MODIFY mode then
			 we should fail the store */
			goto fail;
		}
	}
	/* reset the error code potentially set by the tdb_update() */
	tdb->ecode = TDB_SUCCESS;

	/* delete any existing record - if it doesn't exist we don't
           care.  Doing this first reduces fragmentation, and avoids
           coalescing with `allocated' block before it's updated. */
	if (flag != TDB_INSERT)
		tdb_delete_hash(tdb, key, hash);

	/* Copy key+value *before* allocating free space in case malloc
	   fails and we are left with a dead spot in the tdb. */

	if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
		tdb->ecode = TDB_ERR_OOM;
		goto fail;
	}

	memcpy(p, key.dptr, key.dsize);
	if (dbuf.dsize)
		memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);

	if (tdb->max_dead_records != 0) {
		/*
		 * Allow for some dead records per hash chain, look if we can
		 * find one that can hold the new record. We need enough space
		 * for key, data and tailer. If we find one, we don't have to
		 * consult the central freelist.
		 */
		rec_ptr = tdb_find_dead(
			tdb, hash, &rec,
			key.dsize + dbuf.dsize + sizeof(tdb_off_t));

		if (rec_ptr != 0) {
			rec.key_len = key.dsize;
			rec.data_len = dbuf.dsize;
			rec.full_hash = hash;
			rec.magic = TDB_MAGIC;
			if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
			    || tdb->methods->tdb_write(
				    tdb, rec_ptr + sizeof(rec),
				    p, key.dsize + dbuf.dsize) == -1) {
				goto fail;
			}
			goto done;
		}
	}

	/*
	 * We have to allocate some space from the freelist, so this means we
	 * have to lock it. Use the chance to purge all the DEAD records from
	 * the hash chain under the freelist lock.
	 */

	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
		goto fail;
	}

	if ((tdb->max_dead_records != 0)
	    && (tdb_purge_dead(tdb, hash) == -1)) {
		tdb_unlock(tdb, -1, F_WRLCK);
		goto fail;
	}

	/* we have to allocate some space */
	rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);

	tdb_unlock(tdb, -1, F_WRLCK);

	if (rec_ptr == 0) {
		goto fail;
	}

	/* Read hash top into next ptr */
	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
		goto fail;

	rec.key_len = key.dsize;
	rec.data_len = dbuf.dsize;
	rec.full_hash = hash;
	rec.magic = TDB_MAGIC;

	/* write out and point the top of the hash chain at it */
	if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
	    || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
	    || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
		/* Need to tdb_unallocate() here */
		goto fail;
	}

 done:
	ret = 0;
 fail:
	if (ret == 0) {
		tdb_increment_seqnum(tdb);
	}

	SAFE_FREE(p); 
	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
	return ret;
}