//search linear structure starting with the root of a tree static int startEdgeFromNode ( kmer_t * node1, FILE * fp ) { int node_c, palindrome; unsigned char flag; KMER_PT * ite_pt, *temp_pt; Kmer word1, bal_word1; char ch1; if ( node1->linear || node1->deleted ) { return 0; } // ignore floating loop word1 = node1->seq; bal_word1 = reverseComplement ( word1, overlaplen ); // linear structure for ( ch1 = 0; ch1 < 4; ch1++ ) // for every node on outgoing list { flag = get_kmer_right_cov ( *node1, ch1 ); if ( !flag ) { continue; } emptyStack ( nodeStack ); temp_pt = ( KMER_PT * ) stackPush ( nodeStack ); temp_pt->node = node1; temp_pt->isSmaller = 1; temp_pt->kmer = word1; stringBeads ( temp_pt, ch1, &node_c ); //printf("%d nodes\n",node_c); if ( node_c < 2 ) { printf ( "%d nodes in this line!!!!!!!!!!!\n", node_c ); } else { //make a reverse complement node list stackBackup ( nodeStack ); emptyStack ( bal_nodeStack ); while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL ) { temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack ); temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen ); } stackRecover ( nodeStack ); palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack ); stackRecover ( nodeStack ); if ( palindrome ) { merge_linearV2 ( 0, nodeStack, node_c, fp ); } else { merge_linearV2 ( 1, nodeStack, node_c, fp ); } } } //every possible outgoing edges for ( ch1 = 0; ch1 < 4; ch1++ ) // for every node on incoming list { flag = get_kmer_left_cov ( *node1, ch1 ); if ( !flag ) { continue; } emptyStack ( nodeStack ); temp_pt = ( KMER_PT * ) stackPush ( nodeStack ); temp_pt->node = node1; temp_pt->isSmaller = 0; temp_pt->kmer = bal_word1; stringBeads ( temp_pt, int_comp ( ch1 ), &node_c ); if ( node_c < 2 ) { printf ( "%d nodes in this line!!!!!!!!!!!\n", node_c ); } else { //make a reverse complement node list stackBackup ( nodeStack ); emptyStack ( bal_nodeStack ); while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL ) { temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack ); temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen ); } stackRecover ( nodeStack ); palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack ); stackRecover ( nodeStack ); if ( palindrome ) { merge_linearV2 ( 0, nodeStack, node_c, fp ); //printf("edge is palindrome with length %d\n",temp_edge.length); } else { merge_linearV2 ( 1, nodeStack, node_c, fp ); } } } //every possible incoming edges return 0; }
/************************************************* Function: startEdgeFromNode Description: Constructs edges from a branched node or end node. for every branch (left , right) 1. Puts the linear node into a stack 2. Checks the edge to be built form the stack are plalindrome or not 3. Builds an edge by merge the linear nodes Input: 1. ht: the graph hashtable 2. K_size: kmer size 3. fp: the file pointer for writing out edge sequences Output: None. Return: Zero. *************************************************/ static int startEdgeFromNode ( hashtable2 * ht, int K_size, bucket2 * node, FILE * fp ) { static size_t call_times; call_times++; if ( node->kmer_info.linear || node->kmer_info.deleted ) { return 0;//linear node ... } int left, right; left = count_left_edge_num ( node ); right = count_right_edge_num ( node ); if ( left == 0 && right == 0 ) { return 0; //it's a dead node } list<stacked_node2 *> stack; edge_node * t_edge = NULL, *t_next = NULL; stacked_node2 * t_stacked_node = NULL; vector<preEDGE2> loops_edges; int node_c; //for right edge t_edge = node->kmer_info.right; while ( t_edge ) { if ( t_edge->used == 1 ) { t_edge = t_edge->nxt_edge; continue; } t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) ); t_stacked_node->node = node; t_stacked_node->is_left = 0; t_stacked_node->edge = t_edge; t_stacked_node->next = NULL; stack.push_back ( t_stacked_node ); t_edge->used = 1; stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c ); process_1stack ( ht, K_size, stack, fp, loops_edges ); t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge dislink ( ht, K_size, stack.front() ); if ( stack.size() > 2 ) { stack.pop_back();//change the stack if ( stack.back() && stack.size() > 1 ) //last but second node { dislink ( ht, K_size, stack.back() ); } } stacked_node2 * head, *tmp_node; head = stack.front(); while ( head ) { tmp_node = head; free ( tmp_node ); head = head->next; } stack.clear(); t_edge = t_next; } //for left edge t_edge = node->kmer_info.left; while ( t_edge ) { if ( t_edge->used == 1 ) { t_edge = t_edge->nxt_edge; continue; } t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) ); t_stacked_node->node = node; t_stacked_node->is_left = 1; t_stacked_node->edge = t_edge; t_stacked_node->next = NULL; stack.push_back ( t_stacked_node ); t_edge->used = 1; stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c ); // process_1stack ( ht, K_size, stack, fp, loops_edges ); t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge dislink ( ht, K_size, stack.front() ); if ( stack.size() > 2 ) { stack.pop_back();//change the stack if ( stack.back() && stack.size() > 1 ) //last but second node { dislink ( ht, K_size, stack.back() ); } } //debug<<"before free stack"<<endl; stacked_node2 * head, *tmp_node; head = stack.front(); while ( head ) { tmp_node = head; free ( tmp_node ); head = head->next; } stack.clear(); t_edge = t_next; } if ( loops_edges.size() > 0 ) { //fprintf(stderr,"loops_edges size %llu\n",loops_edges.size()); int i, j, size; bool need_output; size = loops_edges.size(); need_output = 1; //bool debug = 0; for ( i = 0; i < size; i++ ) { string seq = * ( loops_edges[i].full_edge ); string rc_seq = revCompSeq ( seq ); /* if(seq.compare("AATTGGACGTGAGAGCAAATTGTATTGAGCATACAATTTGCTCTCACGTCCAATT") == 0) { fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str()); debug = 1; } if(seq.compare("AATTGGACGTGAGAGCAAATTGTATGCTCAATACAATTTGCTCTCACGTCCAATT") == 0) { fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str()); debug = 1; } if(debug ){ fprintf(stderr, "%d %s\n",i,seq.c_str()); fprintf(stderr, "%d %s\n",i,rc_seq.c_str()); }*/ for ( j = i + 1; j < size; j++ ) { string cur_seq = * ( loops_edges[j].full_edge ); if ( seq.compare ( cur_seq ) == 0 ) { fprintf ( stderr, "ERROR: two equal loop edge sequence from same node, this should not happen!\n" ); fprintf ( stderr, "%s\n", seq.c_str() ); exit ( -1 ); } if ( rc_seq.compare ( cur_seq ) == 0 ) { fprintf ( stderr, "INFO: two loop edge sequence are reversed complemental!\n" ); fprintf ( stderr, "%s\n", seq.c_str() ); fprintf ( stderr, "%s\n", rc_seq.c_str() ); need_output = 0; loops_edges[j].cvg += loops_edges[i].cvg; break; } } if ( need_output ) { output_1edge ( &loops_edges[i], K_size, fp ); //fprintf(stderr,"need output %d %s\n",i,seq.c_str()); } delete ( loops_edges[i].full_edge ); need_output = 1; } } return 0; }