hkey_t hash_table_find(const HashTable *const ht, const BinaryKmer key) { const BinaryKmer *ptr; size_t i; uint_fast32_t h; #ifdef HASH_PREFETCH uint_fast32_t h2 = binary_kmer_hash(key,ht->seed+0) & ht->hash_mask; __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1); #endif for(i = 0; i < REHASH_LIMIT; i++) { #ifdef HASH_PREFETCH h = h2; if(ht->buckets[h][HT_BSIZE] == ht->bucket_size) { h2 = binary_kmer_hash(key,ht->seed+i+1) & ht->hash_mask; __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1); } #else h = binary_kmer_hash(key,ht->seed+i) & ht->hash_mask; #endif ptr = hash_table_find_in_bucket_mt(ht, h, key); if(ptr != NULL) return (hkey_t)(ptr - ht->table); if(ht->buckets[h][HT_BSIZE] < ht->bucket_size) return HASH_NOT_FOUND; } rehash_error_exit(ht); }
void hash_table_empty(HashTable *const ht) { memset(ht->table, 0, ht->capacity * sizeof(BinaryKmer)); memset(ht->buckets, 0, ht->num_of_buckets * sizeof(uint8_t[2])); HashTable data = { .table = ht->table, .num_of_buckets = ht->num_of_buckets, .hash_mask = ht->hash_mask, .bucket_size = ht->bucket_size, .capacity = ht->capacity, .buckets = ht->buckets, .num_kmers = 0, .collisions = {0}}; memcpy(ht, &data, sizeof(data)); } static inline const BinaryKmer* hash_table_find_in_bucket(const HashTable *const ht, uint_fast32_t bucket, BinaryKmer bkmer) { const BinaryKmer *ptr = ht_bckt_ptr(ht, bucket); const BinaryKmer *end = ptr + hash_table_bsize(ht, bucket); bkmer.b[0] |= BKMER_SET_FLAG; // mark as assigned in the hash table while(ptr < end) { if(binary_kmer_eq(bkmer, *ptr)) return ptr; ptr++; } return NULL; // Not found } // Remember to increment ht->num_kmers static inline BinaryKmer* hash_table_insert_in_bucket(HashTable *ht, uint_fast32_t bucket, BinaryKmer bkmer) { size_t bsize = hash_table_bsize(ht, bucket); size_t bitems = hash_table_bitems(ht, bucket); ctx_assert(bitems < ht->bucket_size); ctx_assert(bitems <= bsize); BinaryKmer *ptr = ht_bckt_ptr(ht, bucket); bkmer.b[0] |= BKMER_SET_FLAG; // mark as assigned in the hash table if(bitems == bsize) { ptr += bsize; ht->buckets[bucket][HT_BSIZE]++; } else { // Find an entry that has been deleted from this bucket previously while(HASH_ENTRY_ASSIGNED(*ptr)) ptr++; } *ptr = bkmer; ht->buckets[bucket][HT_BITEMS]++; return ptr; }
hkey_t hash_table_find_or_insert_mt(HashTable *ht, const BinaryKmer key, bool *found, volatile uint8_t *bktlocks) { const BinaryKmer *ptr; size_t i; uint_fast32_t h; #ifdef HASH_PREFETCH uint_fast32_t h2 = binary_kmer_hash(key,ht->seed+0) & ht->hash_mask; __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1); #endif for(i = 0; i < REHASH_LIMIT; i++) { #ifdef HASH_PREFETCH h = h2; if(ht->buckets[h][HT_BSIZE] == ht->bucket_size) { h2 = binary_kmer_hash(key,ht->seed+i+1) & ht->hash_mask; __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1); } #else h = binary_kmer_hash(key,ht->seed+i) & ht->hash_mask; #endif bitlock_yield_acquire(bktlocks, h); // We have the bucket lock so noone else can find or insert elements // therefore we can use non-threadsafe bucket functions // bitlock_acquire/release provide memory barriers ptr = hash_table_find_in_bucket_mt(ht, h, key); if(ptr != NULL) { *found = true; bitlock_release(bktlocks, h); return (hkey_t)(ptr - ht->table); } else if(ht->buckets[h][HT_BITEMS] < ht->bucket_size) { *found = false; ptr = hash_table_insert_in_bucket(ht, h, key); bitlock_release(bktlocks, h); __sync_add_and_fetch((volatile uint64_t*)&ht->collisions[i], 1); __sync_add_and_fetch((volatile uint64_t*)&ht->num_kmers, 1); return (hkey_t)(ptr - ht->table); } else { bitlock_release(bktlocks, h); } } rehash_error_exit(ht); }
hkey_t hash_table_find_or_insert(HashTable *ht, const BinaryKmer key, bool *found) { const BinaryKmer *ptr; size_t i; uint_fast32_t h; #ifdef HASH_PREFETCH uint_fast32_t h2 = binary_kmer_hash(key,ht->seed+0) & ht->hash_mask; __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1); #endif for(i = 0; i < REHASH_LIMIT; i++) { #ifdef HASH_PREFETCH h = h2; if(ht->buckets[h][HT_BSIZE] == ht->bucket_size) { h2 = binary_kmer_hash(key,ht->seed+i+1) & ht->hash_mask; __builtin_prefetch(ht_bckt_ptr(ht, h2), 0, 1); } #else h = binary_kmer_hash(key,ht->seed+i) & ht->hash_mask; #endif ptr = hash_table_find_in_bucket_mt(ht, h, key); if(ptr != NULL) { *found = true; return (hkey_t)(ptr - ht->table); } else if(ht->buckets[h][HT_BITEMS] < ht->bucket_size) { *found = false; ptr = hash_table_insert_in_bucket(ht, h, key); ht->collisions[i]++; // only increment collisions when inserting ht->num_kmers++; return (hkey_t)(ptr - ht->table); } } rehash_error_exit(ht); }
void hash_table_empty(HashTable *const ht) { size_t i; BinaryKmer *table = ht->table; for(i = 0; i < ht->capacity; i++) table[i] = unset_bkmer; memset(ht->buckets, 0, ht->num_of_buckets * sizeof(uint8_t[2])); HashTable data = { .table = ht->table, .num_of_buckets = ht->num_of_buckets, .hash_mask = ht->hash_mask, .bucket_size = ht->bucket_size, .capacity = ht->capacity, .buckets = ht->buckets, .num_kmers = 0, .collisions = {0}}; memcpy(ht, &data, sizeof(data)); } static inline const BinaryKmer* hash_table_find_in_bucket_mt(const HashTable *const ht, uint_fast32_t bucket, const BinaryKmer bkmer) { const BinaryKmer *ptr = ht_bckt_ptr(ht, bucket); const BinaryKmer *end = ptr + *(volatile __typeof(ht->buckets[0][0])*)&ht->buckets[bucket][HT_BSIZE]; while(ptr < end) { BinaryKmer tgt = *(volatile const BinaryKmer*)ptr; if(binary_kmers_are_equal(bkmer, tgt)) return ptr; ptr++; } return NULL; // Not found } /* static inline const BinaryKmer* hash_table_find_insert_in_bucket(const HashTable *const ht, uint_fast32_t bucket, const BinaryKmer bkmer, bool *found) { const BinaryKmer *ptr = ht_bckt_ptr(ht, bucket); const BinaryKmer *end = ptr + ht->buckets[bucket][HT_BSIZE]; const BinaryKmer *empty = NULL; for(; ptr < end && !binary_kmers_are_equal(bkmer, *ptr); ptr++) { if(!HASH_ENTRY_ASSIGNED(*ptr)) empty = ptr; } *found = (ptr < end); if(ptr == end && empty == NULL && ht->buckets[bucket][HT_BSIZE] < ht->bucket_size) { *empty = bkmer; ht->num_kmers++; ht->buckets[bucket][HT_BITEMS]++; ht->buckets[bucket][HT_BSIZE]++; } return ptr < end ? ptr : empty; } // Code to find/insert: // h = binary_kmer_hash(key,ht->seed+i) & ht->hash_mask; // ptr = hash_table_find_insert_in_bucket(ht, h, key, &f); // if(ptr != NULL) { // *found = f; // return ptr; // } */ // Remember to increment ht->num_kmers static inline BinaryKmer* hash_table_insert_in_bucket(HashTable *ht, uint_fast32_t bucket, const BinaryKmer bkmer) { ctx_assert(ht->buckets[bucket][HT_BITEMS] < ht->bucket_size); BinaryKmer *ptr = ht_bckt_ptr(ht, bucket); if(ht->buckets[bucket][HT_BSIZE] == ht->buckets[bucket][HT_BITEMS]) { ptr += ht->buckets[bucket][HT_BSIZE]; ht->buckets[bucket][HT_BSIZE]++; } else { // Find an entry that has been deleted from this bucket previously while(HASH_ENTRY_ASSIGNED(*ptr)) ptr++; } *ptr = bkmer; ht->buckets[bucket][HT_BITEMS]++; return ptr; }