示例#1
0
//search linear structure starting with the root of a tree
static int startEdgeFromNode ( kmer_t * node1, FILE * fp )
{
	int node_c, palindrome;
	unsigned char flag;
	KMER_PT * ite_pt, *temp_pt;
	Kmer word1, bal_word1;
	char ch1;

	if ( node1->linear || node1->deleted )
		{ return 0; }

	// ignore floating loop
	word1 = node1->seq;
	bal_word1 = reverseComplement ( word1, overlaplen );

	// linear structure
	for ( ch1 = 0; ch1 < 4; ch1++ )     // for every node on outgoing list
	{
		flag = get_kmer_right_cov ( *node1, ch1 );

		if ( !flag )
			{ continue; }

		emptyStack ( nodeStack );
		temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
		temp_pt->node = node1;
		temp_pt->isSmaller = 1;
		temp_pt->kmer = word1;
		stringBeads ( temp_pt, ch1, &node_c );

		//printf("%d nodes\n",node_c);
		if ( node_c < 2 )
			{ printf ( "%d nodes in this line!!!!!!!!!!!\n", node_c ); }
		else
		{
			//make a reverse complement node list
			stackBackup ( nodeStack );
			emptyStack ( bal_nodeStack );

			while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
			{
				temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
				temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
			}

			stackRecover ( nodeStack );
			palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
			stackRecover ( nodeStack );

			if ( palindrome )
			{
				merge_linearV2 ( 0, nodeStack, node_c, fp );
			}
			else
				{ merge_linearV2 ( 1, nodeStack, node_c, fp ); }
		}
	} //every possible outgoing edges

	for ( ch1 = 0; ch1 < 4; ch1++ )     // for every node on incoming list
	{
		flag = get_kmer_left_cov ( *node1, ch1 );

		if ( !flag )
			{ continue; }

		emptyStack ( nodeStack );
		temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
		temp_pt->node = node1;
		temp_pt->isSmaller = 0;
		temp_pt->kmer = bal_word1;
		stringBeads ( temp_pt, int_comp ( ch1 ), &node_c );

		if ( node_c < 2 )
			{ printf ( "%d nodes in this line!!!!!!!!!!!\n", node_c ); }
		else
		{
			//make a reverse complement node list
			stackBackup ( nodeStack );
			emptyStack ( bal_nodeStack );

			while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
			{
				temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
				temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
			}

			stackRecover ( nodeStack );
			palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
			stackRecover ( nodeStack );

			if ( palindrome )
			{
				merge_linearV2 ( 0, nodeStack, node_c, fp );
				//printf("edge is palindrome with length %d\n",temp_edge.length);
			}
			else
				{ merge_linearV2 ( 1, nodeStack, node_c, fp ); }
		}
	} //every possible incoming edges

	return 0;
}
示例#2
0
/*************************************************
Function:
    startEdgeFromNode
Description:
    Constructs edges from a branched node or end node.
    for every branch (left , right)
    1. Puts the linear node into a stack
    2. Checks the edge to be built form the stack are plalindrome or not
    3. Builds an edge by merge the linear nodes
Input:
    1. ht:      the graph hashtable
    2. K_size:      kmer size
    3. fp:      the file pointer for writing out edge sequences
Output:
    None.
Return:
    Zero.
*************************************************/
static int startEdgeFromNode ( hashtable2 * ht, int K_size, bucket2 * node, FILE * fp )
{
	static size_t call_times;
	call_times++;

	if ( node->kmer_info.linear || node->kmer_info.deleted )
	{
		return 0;//linear node ...
	}

	int left, right;
	left = count_left_edge_num ( node );
	right = count_right_edge_num ( node );

	if ( left == 0 && right == 0 )
	{
		return 0; //it's a dead node
	}

	list<stacked_node2 *> stack;
	edge_node * t_edge = NULL, *t_next = NULL;
	stacked_node2 * t_stacked_node = NULL;
	vector<preEDGE2> loops_edges;
	int node_c;
	//for right edge
	t_edge = node->kmer_info.right;

	while ( t_edge )
	{
		if ( t_edge->used == 1 )
		{
			t_edge = t_edge->nxt_edge;
			continue;
		}

		t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
		t_stacked_node->node = node;
		t_stacked_node->is_left = 0;
		t_stacked_node->edge = t_edge;
		t_stacked_node->next = NULL;
		stack.push_back ( t_stacked_node );
		t_edge->used = 1;
		stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c );
		process_1stack ( ht, K_size, stack, fp, loops_edges );
		t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge
		dislink ( ht, K_size, stack.front() );

		if ( stack.size() > 2 )
		{
			stack.pop_back();//change the stack

			if ( stack.back() && stack.size() > 1 ) //last but second node
			{
				dislink ( ht, K_size, stack.back() );
			}
		}

		stacked_node2 * head, *tmp_node;
		head = stack.front();

		while ( head )
		{
			tmp_node = head;
			free ( tmp_node );
			head = head->next;
		}

		stack.clear();
		t_edge = t_next;
	}

	//for left edge
	t_edge = node->kmer_info.left;

	while ( t_edge )
	{
		if ( t_edge->used == 1 )
		{
			t_edge = t_edge->nxt_edge;
			continue;
		}

		t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
		t_stacked_node->node = node;
		t_stacked_node->is_left = 1;
		t_stacked_node->edge = t_edge;
		t_stacked_node->next = NULL;
		stack.push_back ( t_stacked_node );
		t_edge->used = 1;
		stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c ); //
		process_1stack ( ht, K_size, stack, fp, loops_edges );
		t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge
		dislink ( ht, K_size, stack.front() );

		if ( stack.size() > 2 )
		{
			stack.pop_back();//change the stack

			if ( stack.back() && stack.size() > 1 ) //last but second node
			{
				dislink ( ht, K_size, stack.back() );
			}
		}

		//debug<<"before free stack"<<endl;
		stacked_node2 * head, *tmp_node;
		head = stack.front();

		while ( head )
		{
			tmp_node = head;
			free ( tmp_node );
			head = head->next;
		}

		stack.clear();
		t_edge = t_next;
	}

	if ( loops_edges.size() > 0 )
	{
		//fprintf(stderr,"loops_edges size %llu\n",loops_edges.size());
		int i, j, size;
		bool need_output;
		size = loops_edges.size();
		need_output = 1;

		//bool debug = 0;
		for ( i = 0; i < size; i++ )
		{
			string seq = * ( loops_edges[i].full_edge );
			string rc_seq = revCompSeq ( seq );
			/*
			if(seq.compare("AATTGGACGTGAGAGCAAATTGTATTGAGCATACAATTTGCTCTCACGTCCAATT") == 0) {
			                    fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str());
			    debug = 1;
			            }

			            if(seq.compare("AATTGGACGTGAGAGCAAATTGTATGCTCAATACAATTTGCTCTCACGTCCAATT") == 0) {
			                    fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str());
			    debug = 1;
			            }

			if(debug ){
			    fprintf(stderr, "%d %s\n",i,seq.c_str());
			    fprintf(stderr, "%d %s\n",i,rc_seq.c_str());
			}*/

			for ( j = i + 1; j < size; j++ )
			{
				string cur_seq = * ( loops_edges[j].full_edge );

				if ( seq.compare ( cur_seq ) == 0 )
				{
					fprintf ( stderr, "ERROR: two equal loop edge sequence from same node, this should not happen!\n" );
					fprintf ( stderr, "%s\n", seq.c_str() );
					exit ( -1 );
				}

				if ( rc_seq.compare ( cur_seq ) == 0 )
				{
					fprintf ( stderr, "INFO: two loop edge sequence are reversed complemental!\n" );
					fprintf ( stderr, "%s\n", seq.c_str() );
					fprintf ( stderr, "%s\n", rc_seq.c_str() );
					need_output = 0;
					loops_edges[j].cvg += loops_edges[i].cvg;
					break;
				}
			}

			if ( need_output )
			{
				output_1edge ( &loops_edges[i], K_size, fp );
				//fprintf(stderr,"need output %d %s\n",i,seq.c_str());
			}

			delete ( loops_edges[i].full_edge );
			need_output = 1;
		}
	}

	return 0;
}