Exemplo n.º 1
0
// WARNING: the kmer atcg is different from soapdenovo's represent
static void output_1edge ( preEDGE2 * long_edge, int K_size, FILE * fp )
{
	fprintf ( fp, ">length %d,", long_edge->full_edge->size() - K_size );
	const char * seq = long_edge->full_edge->c_str();
	//uint64_t from_kmer[2],to_kmer[2];
	kmer_t2 from_kmer, to_kmer;
	get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, 0, &from_kmer );
	get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, long_edge->full_edge->size() - K_size, &to_kmer );
	uint64_t * from, *to;
	from = from_kmer.kmer;
	to = to_kmer.kmer;
#ifdef _63MER_
	fprintf ( fp, "%llx %llx,", from[0], from[1] );
	fprintf ( fp, "%llx %llx,", to[0], to[1] );
#endif
#ifdef _127MER_
	fprintf ( fp, "%llx %llx %llx %llx,", from[0], from[1], from[2], from[3] );
	fprintf ( fp, "%llx %llx %llx %llx,", to[0], to[1], to[2], to[3] );
#endif
	fprintf ( fp, "cvg %d,%d\n", long_edge->cvg, long_edge->bal_edge );
	fprintf ( fp, "%s", seq );
	fprintf ( fp, "\n" );
}
static void chop_kmers ( const char * read, int len, int K_size, kmer_t2 * kmer_array, int kmer_array_len, int & kmer_num )
{
	if ( len <= K_size )
	{
		kmer_num = 0;
		return ;
	}

	kmer_num = len - K_size + 1;

	if ( kmer_num > kmer_array_len )
	{
		fprintf ( stderr, "ERROR: the kmer_array_len is not enough! %d\n", kmer_num );
		exit ( 1 );
	}

	kmer_t2 kmer;

	for ( int i = 0; i < kmer_num; ++i )             //optimize later
	{
		get_kmer_from_seq ( read, len, K_size, i, &kmer );
		kmer_array[i] = kmer;
	}
}
/*************************************************
Function:
    process_edge
Description:
    It builds vetexes  from one or part of one edge sequence.
Input:
    1. v_ht:        hashtable
    2. K_size:      kmer size
    3. seq:     edge sequence
    4. len:     edge length
    5. type:        1: process head and tail;  2: process head ; 3:process tail
    6. edge_id:     edge id
    7. bal_edge:        0:palindrome 1:else
Output:
    None.
Return:
    None.
*************************************************/
static void process_edge ( vertex_hash2 * v_ht, int K_size, char * seq, int len, int type, size_t edge_id, bool bal_edge )
{
	kmer_t2 vertex_kmer;
	kmer_t2 edge_kmer;
	vertex2 * v_tmp;
	edge_starter2 * e_tmp;
	int is_found;
	bool is_left;
	int edge_kmer_len;

	switch ( type )
	{
		case 1: //process all ..
			//process the head
			get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );

			if ( len <= K_size + gap ) //get the last kmer
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				//get_kmer_from_seq(seq, len, K_size, K_size,&edge_kmer);
				get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 1;//left
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			//process the tail
			get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );

			if ( len <= K_size + gap ) //get the first kmer
			{
				get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 1;
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			break;
		case 2:
			//process only the  head
			get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );

			if ( len <= K_size + gap )
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 1;//left
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			break;
		case 3:
			//process only the tail
			get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );

			if ( len <= K_size + gap )
			{
				get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
				edge_kmer_len = len - K_size;
			}
			else
			{
				get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
				edge_kmer_len = gap;
			}

			is_left = 1;
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id );
			reverseCompKmer ( &vertex_kmer, K_size );
			reverseCompKmer ( &edge_kmer, K_size );
			is_left = 0;//right
			v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
			put_edge ( v_tmp, edge_kmer,  is_left, edge_kmer_len, edge_id + bal_edge );
			break;
		default:
			fprintf ( stderr, "ERROR: wrong process type in process_edge()\n" );
			exit ( 1 );
	}
}