/************************************************* Function: lastKmer Description: Searches the node that a node's kmer-edge end with. Input: 1. ht: the graph hashtable 2. K_size: kmer size 3. node: the node whose kmer-edge will be searched 4. edge: the kmer-edge 5. is_left: whether the kmer-edge on the node's left side Output: 1. smaller: whether the searched result, a kmer is smaller than its reversed complement Return: A pointer to the found node. Null if not found. *************************************************/ static bucket2 * lastKmer ( hashtable2 * ht, int K_size, bucket2 * node, edge_node * edge, int is_left, int & smaller ) //NEW { if ( !node || !edge ) { return NULL; } kmer_t2 t_kmer, f_kmer; t_kmer = node->kmer_t2; kmer_t2 edge_seq; memset ( edge_seq.kmer, 0, sizeof ( edge_seq ) ); ( edge_seq.kmer ) [sizeof ( edge_seq ) / sizeof ( uint64_t ) - 1] = edge->edge; int edge_len = edge->len + 1; if ( edge_len > K_size ) { fprintf ( stderr, "ERROR: g value should be no great than kmer size!\n" ); exit ( -1 ); } kmer_t2 KMER_FILTER; initKmerFilter ( K_size, &KMER_FILTER ); if ( is_left ) //left edge { kmerMoveRight ( &t_kmer, edge_len ); kmerMoveLeft ( &edge_seq, K_size - edge_len ); kmerOr ( &t_kmer, &edge_seq ); kmerAnd ( &t_kmer, &KMER_FILTER ); } else { kmerMoveLeft ( &t_kmer, edge_len ); kmerOr ( &t_kmer, &edge_seq ); kmerAnd ( &t_kmer, &KMER_FILTER ); } f_kmer = t_kmer; reverseCompKmer ( &f_kmer, K_size ); if ( kmerCompare ( &t_kmer, &f_kmer ) > 0 ) { t_kmer = f_kmer; smaller = 0; } else { smaller = 1; } return search_kmer ( ht, &t_kmer ); }
static string stack2string ( hashtable2 * ht, int K_size, list<stacked_node2 *> & stack ) { static size_t call_times; call_times++; string full_edge; stacked_node2 * t_stack_node = stack.front(); char tmp[1024]; uint64_t bits[2]; kmer_t2 tmp_kmer = ( t_stack_node->node->kmer_t2 ); if ( t_stack_node->is_left ) { reverseCompKmer ( &tmp_kmer, K_size ); } else { } bitsarr2str ( tmp_kmer.kmer, K_size, tmp, sizeof ( kmer_t2 ) / sizeof ( uint64_t ) ); full_edge.append ( tmp ); //put first node while ( t_stack_node ) { if ( t_stack_node->edge ) { if ( t_stack_node->is_left ) { bits[0] = get_rev_comp_seq ( t_stack_node->edge->edge, t_stack_node->edge->len + 1 ); bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 ); full_edge.append ( tmp ); } else { bits[0] = t_stack_node->edge->edge; bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 ); full_edge.append ( tmp ); } } t_stack_node = t_stack_node->next; } return full_edge; }
/************************************************* Function: process_edge Description: It builds vetexes from one or part of one edge sequence. Input: 1. v_ht: hashtable 2. K_size: kmer size 3. seq: edge sequence 4. len: edge length 5. type: 1: process head and tail; 2: process head ; 3:process tail 6. edge_id: edge id 7. bal_edge: 0:palindrome 1:else Output: None. Return: None. *************************************************/ static void process_edge ( vertex_hash2 * v_ht, int K_size, char * seq, int len, int type, size_t edge_id, bool bal_edge ) { kmer_t2 vertex_kmer; kmer_t2 edge_kmer; vertex2 * v_tmp; edge_starter2 * e_tmp; int is_found; bool is_left; int edge_kmer_len; switch ( type ) { case 1: //process all .. //process the head get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer ); if ( len <= K_size + gap ) //get the last kmer { get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer ); edge_kmer_len = len - K_size; } else { //get_kmer_from_seq(seq, len, K_size, K_size,&edge_kmer); get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 1;//left v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); //process the tail get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer ); if ( len <= K_size + gap ) //get the first kmer { get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer ); edge_kmer_len = len - K_size; } else { get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 1; v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); break; case 2: //process only the head get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer ); if ( len <= K_size + gap ) { get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer ); edge_kmer_len = len - K_size; } else { get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 1;//left v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); break; case 3: //process only the tail get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer ); if ( len <= K_size + gap ) { get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer ); edge_kmer_len = len - K_size; } else { get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 1; v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); break; default: fprintf ( stderr, "ERROR: wrong process type in process_edge()\n" ); exit ( 1 ); } }