示例#1
0
// Lookup for key in bucket defined by the hash value. 
// If key is in bucket, returns true and the position of the key/element in current_pos.
// If key is not in bucket, and bucket is not full, returns the next available position in current_pos (and overflow is returned as false)
// If key is not in bucket, and bucket is full, returns overflow=true
boolean hash_table_find_in_bucket(Key key, long long * current_pos, boolean * overflow, HashTable * hash_table, int rehash){
	
	
	//add the rehash to the final bitfield in the BinaryKmer
	BinaryKmer bkmer_with_rehash_added;
	binary_kmer_initialise_to_zero(&bkmer_with_rehash_added);
	binary_kmer_assignment_operator(bkmer_with_rehash_added, *key);
	bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1] =   bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1]+ (bitfield_of_64bits) rehash;
	
	int hashval = hash_value(&bkmer_with_rehash_added,hash_table->number_buckets);
	
	
	boolean found = false;
	int i=0;                     //position in bucket
	*overflow    = false;
	*current_pos   = (long long) hashval * hash_table->bucket_size;   //position in hash table
	
	while( (i<hash_table->bucket_size) &&   // still within the bucket
		  (!element_check_for_flag_ALL_OFF(&hash_table->table[*current_pos]) )  && // not yet reached an empty space
		  (!found)
		  )
    {
		
		//sanity check -- to avoid out of boundary access
		if (*current_pos >= hash_table->number_buckets * hash_table->bucket_size || *current_pos<0)
		{
			printf("out of bounds problem found in hash table_find_with_position\n");
			exit(1);
		}
		
		//element found
		
		
		if (element_is_key(key,hash_table->table[*current_pos], hash_table->kmer_size))
		{
			found = true;
		}
		else
		{
			(*current_pos)++;
			i++;
		}
		
    }
	
	
	if (i == hash_table->bucket_size)
    {
		*overflow = true;
    }
	
	
	assert(!found || !(*overflow));
	return found;
}
示例#2
0
Key element_get_key(BinaryKmer * kmer, short kmer_size, Key preallocated_key)
{

	BinaryKmer local_rev_kmer;
	binary_kmer_initialise_to_zero(&local_rev_kmer);
    
	binary_kmer_reverse_complement(kmer, kmer_size, &local_rev_kmer);

	if (binary_kmer_less_than(local_rev_kmer, *kmer, kmer_size)) {
		binary_kmer_assignment_operator(*((BinaryKmer *)
						  preallocated_key),
						local_rev_kmer);
	} else {
		binary_kmer_assignment_operator(*((BinaryKmer *)
						  preallocated_key), *kmer);
	}

	return preallocated_key;

}
示例#3
0
void db_node_print_binary(FILE * fp, Element* node, int kmer_size)
{
	BinaryKmer kmer;
	binary_kmer_assignment_operator(kmer, *element_get_kmer(node));
	//Edges edges = 0;
	//uint32_t coverage = 0;
	
	fwrite(&kmer, sizeof(bitfield_of_64bits), NUMBER_OF_BITFIELDS_IN_BINARY_KMER, fp);
	//fwrite(&coverage, sizeof(uint32_t), 1, fp);
	//fwrite(&edges, sizeof(Edges), 1, fp);
}
示例#4
0
文件: dB_graph.c 项目: goshng/cocoa
dBNode * db_graph_get_next_node(dBNode * current_node, Orientation current_orientation,
                                Orientation * next_orientation,
                                Nucleotide edge, Nucleotide * reverse_edge,dBGraph * db_graph){
    
    BinaryKmer local_copy_of_kmer;
    binary_kmer_assignment_operator(local_copy_of_kmer, current_node->kmer);
    
    BinaryKmer tmp_kmer;
    dBNode * next_node=NULL;
    
    // after the following line tmp_kmer and rev_kmer are pointing to the same B Kmer
    BinaryKmer* rev_kmer = binary_kmer_reverse_complement(&local_copy_of_kmer,db_graph->kmer_size, &tmp_kmer);
    
    
    if (current_orientation == reverse){
        *reverse_edge = binary_kmer_get_last_nucleotide(&local_copy_of_kmer);
        binary_kmer_assignment_operator(local_copy_of_kmer,*rev_kmer);
    }
    else{
        *reverse_edge = binary_kmer_get_last_nucleotide(rev_kmer);
    }
    
    
    binary_kmer_left_shift_one_base_and_insert_new_base_at_right_end(&local_copy_of_kmer, edge, db_graph->kmer_size);
    
    //get node from table
    next_node = hash_table_find(element_get_key(&local_copy_of_kmer,db_graph->kmer_size, &tmp_kmer),db_graph);
    
    if (next_node != NULL){
        *next_orientation = db_node_get_orientation(&local_copy_of_kmer,next_node,db_graph->kmer_size);
    }
    else
    {
        // debug
        char tmpzamseq[db_graph->kmer_size+1];
        warn("Cannot find %s so get a NULL node\n", binary_kmer_to_seq(&tmp_kmer, db_graph->kmer_size, tmpzamseq));
    }
    
    return next_node;
    
}
示例#5
0
//this methods inserts an element in the next available bucket
//it doesn't check whether another element with the same key is present in the table
//used for fast loading when it is known that all the elements in the input have different key
ElementEc * hash_table_ec_insert(Key key, HashTableEc * hash_table){
  
  if (hash_table == NULL) {
    die("NULL table!");
  }
  
  ElementEc element;
  ElementEc * ret = NULL;
  int rehash = 0;
  boolean inserted = false;
  do{
    //add the rehash to the final bitfield in the BinaryKmer
    BinaryKmer bkmer_with_rehash_added;
    binary_kmer_initialise_to_zero(&bkmer_with_rehash_added);
    binary_kmer_assignment_operator(bkmer_with_rehash_added, *key);
    bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1] =   bkmer_with_rehash_added[NUMBER_OF_BITFIELDS_IN_BINARY_KMER-1]+ (bitfield_of_64bits) rehash;

    uint32_t hashval = hash_value(&bkmer_with_rehash_added,hash_table->number_buckets);
    
    if (hash_table->next_element[hashval] < hash_table->bucket_size)
      { //can insert element
	long long  current_pos   = (long long) hashval * hash_table->bucket_size + (long long) hash_table->next_element[hashval] ;   //position in hash table

	//sanity check
	if (!db_node_ec_check_for_flag_ALL_OFF(&hash_table->table[current_pos])){
	  die("Out of bounds - trying to insert new node beyond end of bucket\n");
	}
  
      
	element_ec_initialise(&element,key, hash_table->kmer_size);
	element_ec_assign( &(hash_table->table[current_pos]),  &element); 
	hash_table->unique_kmers++;
	hash_table->next_element[hashval]++;	
	ret = &hash_table->table[current_pos];
	inserted=true;
      }
    else
      {//rehash
	rehash++;
	if (rehash>hash_table->max_rehash_tries)
	  {
	    //fprintf(stderr,"too much rehashing!! Reserve more memory.  Rehash=%d\n", rehash);
        //	    die("Dear user - you have not allocated enough memory to contain your sequence data. Either allocate more memory (have you done your calculations right? have you allowed for sequencing errors?), or threshold more harshly on quality score, and try again. Aborting mission.\n");
	  }
      }
    
  } while (! inserted);

  return ret;
}
示例#6
0
void element_assign(Element * e1, Element * e2)
{
    int i;
    
	binary_kmer_assignment_operator((*e1).kmer, (*e2).kmer);

    for (i=0; i<CONTAMINANT_FIELDS; i++) {
        e1->contaminant_flags[i] = e2->contaminant_flags[i];
    }
    
#ifdef STORE_FULL_COVERAGE
    e1->coverage[0] = e2->coverage[0];
    e1->coverage[1] = e2->coverage[1];
#endif
    
    e1->flags = e2->flags;

}
示例#7
0
void element_initialise(Element * e, BinaryKmer * kmer, short kmer_size)
{

	BinaryKmer tmp_kmer;
    int i;

	binary_kmer_initialise_to_zero(&tmp_kmer);
	binary_kmer_assignment_operator(e->kmer, *(element_get_key(kmer, kmer_size, &tmp_kmer)));

    for (i=0; i<CONTAMINANT_FIELDS; i++) {
        e->contaminant_flags[i] = 0;
    }

    e->flags = ASSIGNED;

#ifdef STORE_FULL_COVERAGE
    e->coverage[0] = 0;
    e->coverage[1] = 0;
#endif
}