Element * hash_table_find_or_insert(Key key, boolean * found, HashTable * hash_table){ if (hash_table == NULL) { die("NULL table!"); } Element element; Element * ret = NULL; int rehash = 0; boolean overflow; long long current_pos; do{ *found = hash_table_find_in_bucket(key,¤t_pos,&overflow,hash_table,rehash); if (! *found) { if (!overflow) //it is definitely nowhere in the hashtable, so free to insert { //sanity check if (!db_node_check_for_flag_ALL_OFF(&hash_table->table[current_pos])) { die("error trying to write on an occupied element\n"); } //insert element //printf("Inserting element at position %qd in bucket \n", current_pos); element_initialise(&element,key, hash_table->kmer_size); //hash_table->table[current_pos] = element; //structure assignment element_assign(&(hash_table->table[current_pos]) , &element); ret = &hash_table->table[current_pos]; hash_table->unique_kmers++; } else { //overflow -> rehashing rehash++; if (rehash>hash_table->max_rehash_tries) { //fprintf(stderr,"too much rehashing!! Reserve more memory. Rehash=%d\n", rehash); die("Dear user - you have not allocated enough memory to contain your sequence data. Either allocate more memory (have you done your calculations right? have you allowed for sequencing errors?), or threshold more harshly on quality score, and try again. Aborting mission.\n"); } } } else //it is found { ret = &hash_table->table[current_pos]; } } while (overflow); hash_table->collisions[rehash]++; return ret; }
//currently not used, and must add a test boolean hash_table_apply_or_insert(Key key, void (*f)(Element *), HashTable * hash_table){ if (hash_table == NULL) { puts("NULL table!"); exit(1); } long long current_pos; Element element; boolean overflow; int rehash=0; boolean found; do { found = hash_table_find_in_bucket(key,¤t_pos,&overflow, hash_table,rehash); if (!found) { if (!overflow) { //sanity check if (!element_check_for_flag_ALL_OFF(&hash_table->table[current_pos])){ printf("Out of bounds - trying to insert new node beyond end of bucket\n"); exit(1); } element_initialise(&element,key, hash_table->kmer_size); element_assign( &(hash_table->table[current_pos]), &element); hash_table->unique_kmers++; } else//overflow { rehash++; if (rehash>hash_table->max_rehash_tries) { fprintf(stderr,"too much rehashing!! Rehash=%d\n", rehash); exit(1); } } } else { f(&hash_table->table[current_pos]); } }while(overflow); return found; }
//this methods inserts an element in the next available bucket //it doesn't check whether another element with the same key is present in the table //used for fast loading when it is known that all the elements in the input have different key Element * hash_table_insert(Key key, HashTable * hash_table){ if (hash_table == NULL) { die("NULL table!"); } Element element; Element * ret = NULL; int rehash = 0; boolean inserted = false; do{ //add the rehash to the final bitfield in the BinaryKmer BinaryKmer bkmer_with_rehash_added; binary_kmer_initialise_to_zero(&bkmer_with_rehash_added); binary_kmer_assignment_operator(bkmer_with_rehash_added, *key); bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1] = bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1]+ (bitfield_of_64bits) rehash; uint32_t hashval = hash_value(&bkmer_with_rehash_added,hash_table->number_buckets); if (hash_table->next_element[hashval] < hash_table->bucket_size) { //can insert element long long current_pos = (long long) hashval * hash_table->bucket_size + (long long) hash_table->next_element[hashval] ; //position in hash table //sanity check if (!db_node_check_for_flag_ALL_OFF(&hash_table->table[current_pos])){ die("Out of bounds - trying to insert new node beyond end of bucket\n"); } element_initialise(&element,key, hash_table->kmer_size); element_assign( &(hash_table->table[current_pos]), &element); hash_table->unique_kmers++; hash_table->next_element[hashval]++; ret = &hash_table->table[current_pos]; inserted=true; } else {//rehash rehash++; if (rehash>hash_table->max_rehash_tries) { //fprintf(stderr,"too much rehashing!! Reserve more memory. Rehash=%d\n", rehash); die("Dear user - you have not allocated enough memory to contain your sequence data. Either allocate more memory (have you done your calculations right? have you allowed for sequencing errors?), or threshold more harshly on quality score, and try again. Aborting mission.\n"); } } } while (! inserted); return ret; }
boolean hash_table_apply_or_insert(Key key, void (*f)(Element *), HashTable * hash_table){ if (hash_table == NULL) { die("NULL table!"); } long long current_pos; Element element; boolean overflow; int rehash=0; boolean found; do { found = hash_table_find_in_bucket(key,¤t_pos,&overflow, hash_table,rehash); if (!found) { if (!overflow) { //sanity check if (!db_node_check_for_flag_ALL_OFF(&hash_table->table[current_pos])){ die("Out of bounds - trying to insert new node beyond end of bucket\n"); } element_initialise(&element,key, hash_table->kmer_size); element_assign( &(hash_table->table[current_pos]), &element); hash_table->unique_kmers++; } else//overflow { rehash++; if (rehash>hash_table->max_rehash_tries) { die("Dear user - you have not allocated enough memory to contain your sequence data. \n" "Either allocate more memory (have you done your calculations right?\n" "Have you allowed for sequencing errors?), or threshold more harshly on\n" "quality score, and try again. Aborting mission.\n"); } } } else { f(&hash_table->table[current_pos]); } }while(overflow); return found; }