Ejemplo n.º 1
0
/*************************************************
Function:
    lastKmer
Description:
    Searches the node that a node's kmer-edge end with.
Input:
    1. ht:      the graph hashtable
    2. K_size:      kmer size
    3. node:        the node whose kmer-edge will be searched
    4. edge:        the kmer-edge
    5. is_left:     whether the kmer-edge on the node's left side
Output:
    1. smaller:     whether the searched result, a kmer is smaller than its reversed complement
Return:
    A pointer to the found node.
    Null if not found.
*************************************************/
static bucket2 * lastKmer ( hashtable2 * ht, int K_size, bucket2 * node, edge_node * edge, int is_left, int & smaller ) //NEW
{
	if ( !node || !edge ) { return NULL; }

	kmer_t2 t_kmer, f_kmer;
	t_kmer = node->kmer_t2;
	kmer_t2 edge_seq;
	memset ( edge_seq.kmer, 0, sizeof ( edge_seq ) );
	( edge_seq.kmer ) [sizeof ( edge_seq ) / sizeof ( uint64_t ) - 1] = edge->edge;
	int edge_len = edge->len + 1;

	if ( edge_len > K_size )
	{
		fprintf ( stderr, "ERROR: g value should be no great than kmer size!\n" );
		exit ( -1 );
	}

	kmer_t2 KMER_FILTER;
	initKmerFilter ( K_size, &KMER_FILTER );

	if ( is_left ) //left edge
	{
		kmerMoveRight ( &t_kmer, edge_len );
		kmerMoveLeft ( &edge_seq, K_size - edge_len );
		kmerOr ( &t_kmer, &edge_seq );
		kmerAnd ( &t_kmer, &KMER_FILTER );
	}
	else
	{
		kmerMoveLeft ( &t_kmer, edge_len );
		kmerOr ( &t_kmer, &edge_seq );
		kmerAnd ( &t_kmer, &KMER_FILTER );
	}

	f_kmer = t_kmer;
	reverseCompKmer ( &f_kmer, K_size );

	if ( kmerCompare ( &t_kmer, &f_kmer ) > 0 )
	{
		t_kmer = f_kmer;
		smaller = 0;
	}
	else { smaller = 1; }

	return search_kmer ( ht, &t_kmer );
}
Ejemplo n.º 2
0
static string stack2string ( hashtable2 * ht, int K_size, list<stacked_node2 *> & stack )
{
	static size_t call_times;
	call_times++;
	string full_edge;
	stacked_node2 * t_stack_node = stack.front();
	char tmp[1024];
	uint64_t bits[2];
	kmer_t2 tmp_kmer = ( t_stack_node->node->kmer_t2 );

	if ( t_stack_node->is_left )
	{
		reverseCompKmer ( &tmp_kmer, K_size );
	}
	else
	{
	}

	bitsarr2str ( tmp_kmer.kmer, K_size, tmp, sizeof ( kmer_t2 ) / sizeof ( uint64_t ) );
	full_edge.append ( tmp ); //put first node

	while ( t_stack_node )
	{
		if ( t_stack_node->edge )
		{
			if ( t_stack_node->is_left )
			{
				bits[0] = get_rev_comp_seq ( t_stack_node->edge->edge, t_stack_node->edge->len + 1 );
				bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 );
				full_edge.append ( tmp );
			}
			else
			{
				bits[0] = t_stack_node->edge->edge;
				bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 );
				full_edge.append ( tmp );
			}
		}

		t_stack_node = t_stack_node->next;
	}

	return full_edge;
}
/*************************************************
Function:
    process_edge
Description:
    It builds vetexes  from one or part of one edge sequence.
Input:
    1. v_ht:        hashtable
    2. K_size:      kmer size
    3. seq:     edge sequence
    4. len:     edge length
    5. type:        1: process head and tail;  2: process head ; 3:process tail
    6. edge_id:     edge id
    7. bal_edge:        0:palindrome 1:else
Output:
    None.
Return:
    None.
*************************************************/
static void process_edge ( vertex_hash2 * v_ht, int K_size, char * seq, int len, int type, size_t edge_id, bool bal_edge )
{
	kmer_t2 vertex_kmer;
	kmer_t2 edge_kmer;
	vertex2 * v_tmp;
	edge_starter2 * e_tmp;
	int is_found;
	bool is_left;
	int edge_kmer_len;

	switch ( type )
	{
		case 1: //process all ..
			//process the head
			get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );

			if ( len <= K_size + gap ) //get the last kmer
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				//get_kmer_from_seq(seq, len, K_size, K_size,&edge_kmer);
				get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 1;//left
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			//process the tail
			get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );

			if ( len <= K_size + gap ) //get the first kmer
			{
				get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 1;
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			break;
		case 2:
			//process only the  head
			get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );

			if ( len <= K_size + gap )
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 1;//left
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			break;
		case 3:
			//process only the tail
			get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );

			if ( len <= K_size + gap )
			{
				get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 1;
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			break;
		default:
			fprintf ( stderr, "ERROR: wrong process type in process_edge()\n" );
			exit ( 1 );
	}
}