コード例 #1
0
// Lookup for key in bucket defined by the hash value. 
// If key is in bucket, returns true and the position of the key/element in current_pos.
// If key is not in bucket, and bucket is not full, returns the next available position in current_pos (and overflow is returned as false)
// If key is not in bucket, and bucket is full, returns overflow=true
boolean hash_table_find_in_bucket(Key key, long long * current_pos, boolean * overflow, HashTable * hash_table, int rehash){
	
	
	//add the rehash to the final bitfield in the BinaryKmer
	BinaryKmer bkmer_with_rehash_added;
	binary_kmer_initialise_to_zero(&bkmer_with_rehash_added);
	binary_kmer_assignment_operator(bkmer_with_rehash_added, *key);
	bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1] =   bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1]+ (bitfield_of_64bits) rehash;
	
	int hashval = hash_value(&bkmer_with_rehash_added,hash_table->number_buckets);
	
	
	boolean found = false;
	int i=0;                     //position in bucket
	*overflow    = false;
	*current_pos   = (long long) hashval * hash_table->bucket_size;   //position in hash table
	
	while( (i<hash_table->bucket_size) &&   // still within the bucket
		  (!element_check_for_flag_ALL_OFF(&hash_table->table[*current_pos]) )  && // not yet reached an empty space
		  (!found)
		  )
    {
		
		//sanity check -- to avoid out of boundary access
		if (*current_pos >= hash_table->number_buckets * hash_table->bucket_size || *current_pos<0)
		{
			printf("out of bounds problem found in hash table_find_with_position\n");
			exit(1);
		}
		
		//element found
		
		
		if (element_is_key(key,hash_table->table[*current_pos], hash_table->kmer_size))
		{
			found = true;
		}
		else
		{
			(*current_pos)++;
			i++;
		}
		
    }
	
	
	if (i == hash_table->bucket_size)
    {
		*overflow = true;
    }
	
	
	assert(!found || !(*overflow));
	return found;
}
コード例 #2
0
ファイル: hash_table_ec.c プロジェクト: wtsi-svi/cortex
//this methods inserts an element in the next available bucket
//it doesn't check whether another element with the same key is present in the table
//used for fast loading when it is known that all the elements in the input have different key
ElementEc * hash_table_ec_insert(Key key, HashTableEc * hash_table){
  
  if (hash_table == NULL) {
    die("NULL table!");
  }
  
  ElementEc element;
  ElementEc * ret = NULL;
  int rehash = 0;
  boolean inserted = false;
  do{
    //add the rehash to the final bitfield in the BinaryKmer
    BinaryKmer bkmer_with_rehash_added;
    binary_kmer_initialise_to_zero(&bkmer_with_rehash_added);
    binary_kmer_assignment_operator(bkmer_with_rehash_added, *key);
    bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1] =   bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1]+ (bitfield_of_64bits) rehash;

    uint32_t hashval = hash_value(&bkmer_with_rehash_added,hash_table->number_buckets);
    
    if (hash_table->next_element[hashval] < hash_table->bucket_size)
      { //can insert element
	long long  current_pos   = (long long) hashval * hash_table->bucket_size + (long long) hash_table->next_element[hashval] ;   //position in hash table

	//sanity check
	if (!db_node_ec_check_for_flag_ALL_OFF(&hash_table->table[current_pos])){
	  die("Out of bounds - trying to insert new node beyond end of bucket\n");
	}
  
      
	element_ec_initialise(&element,key, hash_table->kmer_size);
	element_ec_assign( &(hash_table->table[current_pos]),  &element); 
	hash_table->unique_kmers++;
	hash_table->next_element[hashval]++;	
	ret = &hash_table->table[current_pos];
	inserted=true;
      }
    else
      {//rehash
	rehash++;
	if (rehash>hash_table->max_rehash_tries)
	  {
	    //fprintf(stderr,"too much rehashing!! Reserve more memory.  Rehash=%d\n", rehash);
        //	    die("Dear user - you have not allocated enough memory to contain your sequence data. Either allocate more memory (have you done your calculations right? have you allowed for sequencing errors?), or threshold more harshly on quality score, and try again. Aborting mission.\n");
	  }
      }
    
  } while (! inserted);

  return ret;
}
コード例 #3
0
ファイル: element.c プロジェクト: richardmleggett/kontaminant
Key element_get_key(BinaryKmer * kmer, short kmer_size, Key preallocated_key)
{

	BinaryKmer local_rev_kmer;
	binary_kmer_initialise_to_zero(&local_rev_kmer);
    
	binary_kmer_reverse_complement(kmer, kmer_size, &local_rev_kmer);

	if (binary_kmer_less_than(local_rev_kmer, *kmer, kmer_size)) {
		binary_kmer_assignment_operator(*((BinaryKmer *)
						  preallocated_key),
						local_rev_kmer);
	} else {
		binary_kmer_assignment_operator(*((BinaryKmer *)
						  preallocated_key), *kmer);
	}

	return preallocated_key;

}
コード例 #4
0
ファイル: element.c プロジェクト: richardmleggett/kontaminant
void element_initialise(Element * e, BinaryKmer * kmer, short kmer_size)
{

	BinaryKmer tmp_kmer;
    int i;

	binary_kmer_initialise_to_zero(&tmp_kmer);
	binary_kmer_assignment_operator(e->kmer, *(element_get_key(kmer, kmer_size, &tmp_kmer)));

    for (i=0; i<CONTAMINANT_FIELDS; i++) {
        e->contaminant_flags[i] = 0;
    }

    e->flags = ASSIGNED;

#ifdef STORE_FULL_COVERAGE
    e->coverage[0] = 0;
    e->coverage[1] = 0;
#endif
}
コード例 #5
0
ファイル: test_hash.c プロジェクト: iqbal-lab/cortex
void test_hash_table_find_or_insert()
{

  short kmer_size;
  long long max_key_given_kmer_size;

      

  //adds binary kmers to a hash, then tries to find them
  //will go through all possible bin kmers given the kmer_size, stepping 
  // with granulatiry step. Since for kmer_size large (eg 21) it takes to long to try ALL
  // possible 21-mers, the last argument allows you to specify the largest one to test


  BinaryKmer tmp_kmer;
  BinaryKmer tmp_kmer2;
  
  void test(short kmer_size, int num_bits, int bucket, int max_tries, int step, long long max_kmer_to_test)
    {
      
      int number_of_bits      = num_bits;
      int bucket_size         = bucket;
      long long bad_reads     = 0; 
      int max_retries         = max_tries;
      boolean found           = false;
      
      HashTable* hash_table  = hash_table_new(number_of_bits,bucket_size,max_retries,kmer_size);
      
      
      long long i;
          

      for (i=0; i< max_kmer_to_test; i++)
	{

	  BinaryKmer b;
	  binary_kmer_initialise_to_zero(&b);
	  b[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1]=(bitfield_of_64bits) i;

	  hash_table_find_or_insert(element_get_key(&b, hash_table->kmer_size, &tmp_kmer),&found, hash_table);
	  if (found==false)
	    {
	      CU_ASSERT(binary_kmer_comparison_operator(b,*(element_get_key(&b, hash_table->kmer_size, &tmp_kmer)) ) ); 
	    }
	}
      
      Element* e=NULL;
      
      
      for (i=0; i< max_kmer_to_test; i=i+step)
	{

	  BinaryKmer b;
	  binary_kmer_initialise_to_zero(&b);
	  b[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1]=(bitfield_of_64bits) i;

	  e = hash_table_find(element_get_key(&b,hash_table->kmer_size,&tmp_kmer), hash_table);
	  CU_ASSERT(e!=NULL);
	  if (e !=NULL)
	    {
	      CU_ASSERT(binary_kmer_comparison_operator(e->kmer, *element_get_key(&b, hash_table->kmer_size, &tmp_kmer2)) );
	    }
	  else
	    {
        die("Error: e is NULL for i=%lld - unable to find\n",i);
	     }
	}
      
      hash_table_free(&hash_table);
      CU_ASSERT(hash_table == NULL);
      
    }