// WARNING: the kmer atcg is different from soapdenovo's represent static void output_1edge ( preEDGE2 * long_edge, int K_size, FILE * fp ) { fprintf ( fp, ">length %d,", long_edge->full_edge->size() - K_size ); const char * seq = long_edge->full_edge->c_str(); //uint64_t from_kmer[2],to_kmer[2]; kmer_t2 from_kmer, to_kmer; get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, 0, &from_kmer ); get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, long_edge->full_edge->size() - K_size, &to_kmer ); uint64_t * from, *to; from = from_kmer.kmer; to = to_kmer.kmer; #ifdef _63MER_ fprintf ( fp, "%llx %llx,", from[0], from[1] ); fprintf ( fp, "%llx %llx,", to[0], to[1] ); #endif #ifdef _127MER_ fprintf ( fp, "%llx %llx %llx %llx,", from[0], from[1], from[2], from[3] ); fprintf ( fp, "%llx %llx %llx %llx,", to[0], to[1], to[2], to[3] ); #endif fprintf ( fp, "cvg %d,%d\n", long_edge->cvg, long_edge->bal_edge ); fprintf ( fp, "%s", seq ); fprintf ( fp, "\n" ); }
static void chop_kmers ( const char * read, int len, int K_size, kmer_t2 * kmer_array, int kmer_array_len, int & kmer_num ) { if ( len <= K_size ) { kmer_num = 0; return ; } kmer_num = len - K_size + 1; if ( kmer_num > kmer_array_len ) { fprintf ( stderr, "ERROR: the kmer_array_len is not enough! %d\n", kmer_num ); exit ( 1 ); } kmer_t2 kmer; for ( int i = 0; i < kmer_num; ++i ) //optimize later { get_kmer_from_seq ( read, len, K_size, i, &kmer ); kmer_array[i] = kmer; } }
/************************************************* Function: process_edge Description: It builds vetexes from one or part of one edge sequence. Input: 1. v_ht: hashtable 2. K_size: kmer size 3. seq: edge sequence 4. len: edge length 5. type: 1: process head and tail; 2: process head ; 3:process tail 6. edge_id: edge id 7. bal_edge: 0:palindrome 1:else Output: None. Return: None. *************************************************/ static void process_edge ( vertex_hash2 * v_ht, int K_size, char * seq, int len, int type, size_t edge_id, bool bal_edge ) { kmer_t2 vertex_kmer; kmer_t2 edge_kmer; vertex2 * v_tmp; edge_starter2 * e_tmp; int is_found; bool is_left; int edge_kmer_len; switch ( type ) { case 1: //process all .. //process the head get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer ); if ( len <= K_size + gap ) //get the last kmer { get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer ); edge_kmer_len = len - K_size; } else { //get_kmer_from_seq(seq, len, K_size, K_size,&edge_kmer); get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 1;//left v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); //process the tail get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer ); if ( len <= K_size + gap ) //get the first kmer { get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer ); edge_kmer_len = len - K_size; } else { get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 1; v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); break; case 2: //process only the head get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer ); if ( len <= K_size + gap ) { get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer ); edge_kmer_len = len - K_size; } else { get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 1;//left v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); break; case 3: //process only the tail get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer ); if ( len <= K_size + gap ) { get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer ); edge_kmer_len = len - K_size; } else { get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer ); edge_kmer_len = gap; } is_left = 1; v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id ); reverseCompKmer ( &vertex_kmer, K_size ); reverseCompKmer ( &edge_kmer, K_size ); is_left = 0;//right v_tmp = put_vertex ( v_ht, vertex_kmer, is_found ); put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge ); break; default: fprintf ( stderr, "ERROR: wrong process type in process_edge()\n" ); exit ( 1 ); } }