enum TDB_ERROR tdb_store(struct tdb_context *tdb, struct tdb_data key, struct tdb_data dbuf, int flag) { struct hash_info h; tdb_off_t off; tdb_len_t old_room = 0; struct tdb_used_record rec; enum TDB_ERROR ecode; off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = off; } /* Now we have lock on this hash bucket. */ if (flag == TDB_INSERT) { if (off) { ecode = TDB_ERR_EXISTS; goto out; } } else { if (off) { old_room = rec_data_length(&rec) + rec_extra_padding(&rec); if (old_room >= dbuf.dsize) { /* Can modify in-place. Easy! */ ecode = update_rec_hdr(tdb, off, key.dsize, dbuf.dsize, &rec, h.h); if (ecode != TDB_SUCCESS) { goto out; } ecode = update_data(tdb, off + sizeof(rec) + key.dsize, dbuf, old_room - dbuf.dsize); if (ecode != TDB_SUCCESS) { goto out; } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = TDB_SUCCESS; } } else { if (flag == TDB_MODIFY) { /* if the record doesn't exist and we are in TDB_MODIFY mode then we should fail the store */ ecode = TDB_ERR_NOEXIST; goto out; } } } /* If we didn't use the old record, this implies we're growing. */ ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off); out: tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = ecode; }
int main(int argc, char *argv[]) { unsigned int i; uint64_t val; struct tdb_context *tdb; int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1); failtest_init(argc, argv); failtest_hook = block_repeat_failures; failtest_exit_check = exit_check_log; for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { failtest_suppress = true; tdb = tdb_open("run-expand.tdb", flags[i], O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); if (!ok1(tdb)) break; val = tdb->file->map_size; /* Need some hash lock for expand. */ ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0); failtest_suppress = false; if (!ok1(tdb_expand(tdb, 1) == 0)) { failtest_suppress = true; tdb_close(tdb); break; } failtest_suppress = true; ok1(tdb->file->map_size >= val + 1 * TDB_EXTENSION_FACTOR); ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0); val = tdb->file->map_size; ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0); failtest_suppress = false; if (!ok1(tdb_expand(tdb, 1024) == 0)) { failtest_suppress = true; tdb_close(tdb); break; } failtest_suppress = true; ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0); ok1(tdb->file->map_size >= val + 1024 * TDB_EXTENSION_FACTOR); ok1(tdb_check(tdb, NULL, NULL) == 0); tdb_close(tdb); } ok1(tap_log_messages == 0); failtest_exit(exit_status()); }
enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key, struct tdb_data *data) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; enum TDB_ERROR ecode; off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = off; } if (!off) { ecode = TDB_ERR_NOEXIST; } else { data->dsize = rec_data_length(&rec); data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize, data->dsize); if (TDB_PTR_IS_ERR(data->dptr)) { ecode = TDB_PTR_ERR(data->dptr); } else ecode = TDB_SUCCESS; } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); return tdb->last_error = ecode; }
/* We lock twice, not very efficient. We could keep last key & tinfo cached. */ enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key) { struct traverse_info tinfo; struct hash_info h; struct tdb_used_record rec; if (tdb->flags & TDB_VERSION1) { struct tdb_data last_key = *key; tdb->last_error = TDB_SUCCESS; *key = tdb1_nextkey(tdb, last_key); free(last_key.dptr); /* TDB1 didn't set error for last key. */ if (!key->dptr && tdb->last_error == TDB_SUCCESS) { tdb->last_error = TDB_ERR_NOEXIST; } return tdb->last_error; } tinfo.prev = find_and_lock(tdb, *key, F_RDLCK, &h, &rec, &tinfo); free(key->dptr); if (TDB_OFF_IS_ERR(tinfo.prev)) { return tdb->last_error = TDB_OFF_TO_ERR(tinfo.prev); } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); return tdb->last_error = next_in_hash(tdb, &tinfo, key, NULL); }
static tdb_off_t tdb_offset(struct tdb_context *tdb, struct tdb_data key) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (unlikely(off == TDB_OFF_ERR)) return 0; tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); return off; }
int main(int argc, char *argv[]) { unsigned int i, j; struct tdb_context *tdb; int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { TDB_DATA k; uint64_t size; bool was_empty = false; k.dptr = (void *)&j; k.dsize = sizeof(j); tdb = tdb_open("run-30-exhaust-before-expand.tdb", flags[i], O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); ok1(tdb); if (!tdb) continue; ok1(empty_freetable(tdb)); /* Need some hash lock for expand. */ ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0); /* Create some free space. */ ok1(tdb_expand(tdb, 1) == 0); ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0); ok1(!empty_freetable(tdb)); size = tdb->map_size; /* Insert minimal-length records until we expand. */ for (j = 0; tdb->map_size == size; j++) { was_empty = empty_freetable(tdb); if (tdb_store(tdb, k, k, TDB_INSERT) != 0) err(1, "Failed to store record %i", j); } /* Would have been empty before expansion, but no longer. */ ok1(was_empty); ok1(!empty_freetable(tdb)); tdb_close(tdb); } ok1(tap_log_messages == 0); return exit_status(); }
bool tdb_exists(struct tdb_context *tdb, TDB_DATA key) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { tdb->last_error = off; return false; } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); tdb->last_error = TDB_SUCCESS; return off ? true : false; }
enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; enum TDB_ERROR ecode; if (tdb->flags & TDB_VERSION1) { if (tdb1_delete(tdb, key) == -1) return tdb->last_error; return TDB_SUCCESS; } off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = TDB_OFF_TO_ERR(off); } if (!off) { ecode = TDB_ERR_NOEXIST; goto unlock; } ecode = delete_from_hash(tdb, &h); if (ecode != TDB_SUCCESS) { goto unlock; } /* Free the deleted entry. */ tdb->stats.frees++; ecode = add_free_record(tdb, off, sizeof(struct tdb_used_record) + rec_key_length(&rec) + rec_data_length(&rec) + rec_extra_padding(&rec), TDB_LOCK_WAIT, true); if (tdb->flags & TDB_SEQNUM) tdb_inc_seqnum(tdb); unlock: tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = ecode; }
enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb, TDB_DATA key, enum TDB_ERROR (*parse)(TDB_DATA k, TDB_DATA d, void *data), void *data) { tdb_off_t off; struct tdb_used_record rec; struct hash_info h; enum TDB_ERROR ecode; if (tdb->flags & TDB_VERSION1) { return tdb->last_error = tdb1_parse_record(tdb, key, parse, data); } off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = TDB_OFF_TO_ERR(off); } if (!off) { ecode = TDB_ERR_NOEXIST; } else { const void *dptr; dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize, rec_data_length(&rec), false); if (TDB_PTR_IS_ERR(dptr)) { ecode = TDB_PTR_ERR(dptr); } else { TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec)); ecode = parse(key, d, data); tdb_access_release(tdb, dptr); } } tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); return tdb->last_error = ecode; }
/* We lock twice, not very efficient. We could keep last key & tinfo cached. */ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key) { struct traverse_info tinfo; struct hash_info h; struct tdb_used_record rec; tinfo.prev = find_and_lock(tdb, key, F_RDLCK, &h, &rec, &tinfo); if (unlikely(tinfo.prev == TDB_OFF_ERR)) return tdb_null; tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); switch (next_in_hash(tdb, F_RDLCK, &tinfo, &key, NULL)) { case 1: return key; case 0: tdb->ecode = TDB_SUCCESS; /* Fall thru... */ default: return tdb_null; } }
int main(int argc, char *argv[]) { unsigned int i; struct tdb_context *tdb; int flags[] = { TDB_DEFAULT, TDB_NOMMAP, TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; if (sizeof(off_t) <= 4) { plan_tests(1); pass("No 64 bit off_t"); return exit_status(); } plan_tests(sizeof(flags) / sizeof(flags[0]) * 14); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { off_t old_size; TDB_DATA k, d; struct hash_info h; struct tdb_used_record rec; tdb_off_t off; tdb = tdb_open("run-64-bit-tdb.tdb", flags[i], O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); ok1(tdb); if (!tdb) continue; old_size = tdb->file->map_size; /* This makes a sparse file */ ok1(ftruncate(tdb->file->fd, 0xFFFFFFF0) == 0); ok1(add_free_record(tdb, old_size, 0xFFFFFFF0 - old_size, TDB_LOCK_WAIT, false) == TDB_SUCCESS); /* Now add a little record past the 4G barrier. */ ok1(tdb_expand_file(tdb, 100) == TDB_SUCCESS); ok1(add_free_record(tdb, 0xFFFFFFF0, 100, TDB_LOCK_WAIT, false) == TDB_SUCCESS); ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS); /* Test allocation path. */ k = tdb_mkdata("key", 4); d = tdb_mkdata("data", 5); ok1(tdb_store(tdb, k, d, TDB_INSERT) == 0); ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS); /* Make sure it put it at end as we expected. */ off = find_and_lock(tdb, k, F_RDLCK, &h, &rec, NULL); ok1(off >= 0xFFFFFFF0); tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); ok1(tdb_fetch(tdb, k, &d) == 0); ok1(d.dsize == 5); ok1(strcmp((char *)d.dptr, "data") == 0); free(d.dptr); ok1(tdb_delete(tdb, k) == 0); ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS); tdb_close(tdb); } /* We might get messages about mmap failing, so don't test * tap_log_messages */ return exit_status(); }
enum TDB_ERROR tdb_append(struct tdb_context *tdb, struct tdb_data key, struct tdb_data dbuf) { struct hash_info h; tdb_off_t off; struct tdb_used_record rec; tdb_len_t old_room = 0, old_dlen; unsigned char *newdata; struct tdb_data new_dbuf; enum TDB_ERROR ecode; off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (TDB_OFF_IS_ERR(off)) { return tdb->last_error = off; } if (off) { old_dlen = rec_data_length(&rec); old_room = old_dlen + rec_extra_padding(&rec); /* Fast path: can append in place. */ if (rec_extra_padding(&rec) >= dbuf.dsize) { ecode = update_rec_hdr(tdb, off, key.dsize, old_dlen + dbuf.dsize, &rec, h.h); if (ecode != TDB_SUCCESS) { goto out; } off += sizeof(rec) + key.dsize + old_dlen; ecode = update_data(tdb, off, dbuf, rec_extra_padding(&rec)); goto out; } /* Slow path. */ newdata = malloc(key.dsize + old_dlen + dbuf.dsize); if (!newdata) { ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, "tdb_append:" " failed to allocate %zu bytes", (size_t)(key.dsize + old_dlen + dbuf.dsize)); goto out; } ecode = tdb->methods->tread(tdb, off + sizeof(rec) + key.dsize, newdata, old_dlen); if (ecode != TDB_SUCCESS) { goto out_free_newdata; } memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize); new_dbuf.dptr = newdata; new_dbuf.dsize = old_dlen + dbuf.dsize; } else { newdata = NULL; new_dbuf = dbuf; } /* If they're using tdb_append(), it implies they're growing record. */ ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true); out_free_newdata: free(newdata); out: tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return tdb->last_error = ecode; }
/* This is the core routine which searches the hashtable for an entry. * On error, no locks are held and -ve is returned. * Otherwise, hinfo is filled in (and the optional tinfo). * If not found, the return value is 0. * If found, the return value is the offset, and *rec is the record. */ tdb_off_t find_and_lock(struct tdb_context *tdb, struct tdb_data key, int ltype, struct hash_info *h, struct tdb_used_record *rec, struct traverse_info *tinfo) { uint32_t i, group; tdb_off_t hashtable; enum TDB_ERROR ecode; h->h = tdb_hash(tdb, key.dptr, key.dsize); h->hash_used = 0; group = use_bits(h, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS); h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); h->hlock_start = hlock_range(group, &h->hlock_range); ecode = tdb_lock_hashes(tdb, h->hlock_start, h->hlock_range, ltype, TDB_LOCK_WAIT); if (ecode != TDB_SUCCESS) { return ecode; } hashtable = offsetof(struct tdb_header, hashtable); if (tinfo) { tinfo->toplevel_group = group; tinfo->num_levels = 1; tinfo->levels[0].entry = 0; tinfo->levels[0].hashtable = hashtable + (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t); tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS; } while (h->hash_used <= 64) { /* Read in the hash group. */ h->group_start = hashtable + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); ecode = tdb_read_convert(tdb, h->group_start, &h->group, sizeof(h->group)); if (ecode != TDB_SUCCESS) { goto fail; } /* Pointer to another hash table? Go down... */ if (is_subhash(h->group[h->home_bucket])) { hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK) + sizeof(struct tdb_used_record); if (tinfo) { /* When we come back, use *next* bucket */ tinfo->levels[tinfo->num_levels-1].entry += h->home_bucket + 1; } group = use_bits(h, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS); h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); if (tinfo) { tinfo->levels[tinfo->num_levels].hashtable = hashtable; tinfo->levels[tinfo->num_levels].total_buckets = 1 << TDB_SUBLEVEL_HASH_BITS; tinfo->levels[tinfo->num_levels].entry = group << TDB_HASH_GROUP_BITS; tinfo->num_levels++; } continue; } /* It's in this group: search (until 0 or all searched) */ for (i = 0, h->found_bucket = h->home_bucket; i < (1 << TDB_HASH_GROUP_BITS); i++, h->found_bucket = ((h->found_bucket+1) % (1 << TDB_HASH_GROUP_BITS))) { tdb_bool_err berr; if (is_subhash(h->group[h->found_bucket])) continue; if (!h->group[h->found_bucket]) break; berr = match(tdb, h, &key, h->group[h->found_bucket], rec); if (berr < 0) { ecode = berr; goto fail; } if (berr) { if (tinfo) { tinfo->levels[tinfo->num_levels-1].entry += h->found_bucket; } return h->group[h->found_bucket] & TDB_OFF_MASK; } } /* Didn't find it: h indicates where it would go. */ return 0; } return find_in_chain(tdb, key, hashtable, h, rec, tinfo); fail: tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype); return ecode; }