Ejemplo n.º 1
0
static int calcuIS (STACK * intStack)
{
	long long sum = 0;
	int avg = 0;
	int *item;
	int num = intStack->item_c;

	if (num < 100)
	{
		return avg;
	}

	stackBackup (intStack);

	while ((item = (int *) stackPop (intStack)) != NULL)
	{
		sum += *item;
	}

	stackRecover (intStack);
	num = intStack->item_c;
	avg = sum / num;
	sum = 0;
	stackBackup (intStack);

	while ((item = (int *) stackPop (intStack)) != NULL)
	{
		sum += (*item - avg) * (*item - avg);
	}

	int SD = sqrt (sum / (num - 1));

	if (SD == 0)
	{
		printf ("SD=%d, ", SD);
		return avg;
	}

	stackRecover (intStack);
	sum = num = 0;

	while ((item = (int *) stackPop (intStack)) != NULL)
		if (abs (*item - avg) < 3 * SD)
		{
			sum += *item;
			num++;
		}

	if(num == 0) avg = 0;
	else avg = sum / num;
	printf ("SD=%d, ", SD);
	return avg;
}
Ejemplo n.º 2
0
static void merge_linearV2 ( char bal_edge, STACK * nStack, int count, FILE * fp )
{
	int length, char_index;
	preEDGE * newedge;
	kmer_t * del_node, *longNode;
	char * tightSeq, firstCh;
	long long symbol = 0;
	int len_tSeq;
	Kmer wordplus, bal_wordplus;
	ubyte8 hash_ban;
	KMER_PT * last_np = ( KMER_PT * ) stackPop ( nStack );
	KMER_PT * second_last_np = ( KMER_PT * ) stackPop ( nStack );
	KMER_PT * first_np, *second_np = NULL;
	KMER_PT * temp;
	boolean found, lastOne = 1, single = 1;
	int setPicker;
	length = count - 1;
	len_tSeq = length;

	if ( len_tSeq >= edge_length_limit )
		{ tightSeq = ( char * ) ckalloc ( len_tSeq * sizeof ( char ) ); }
	else
		{ tightSeq = edge_seq; }

	char_index = length - 1;
	newedge = &temp_edge;
	newedge->to_node = last_np->kmer;
	newedge->length = length;
	newedge->bal_edge = bal_edge;
	tightSeq[char_index--] = lastCharInKmer ( last_np->kmer );
	firstCh = firstCharInKmer ( second_last_np->kmer );
	dislink2prevUncertain ( last_np->node, firstCh, last_np->isSmaller );
	stackRecover ( nStack );

	while ( nStack->item_c > 1 )
	{
		second_np = ( KMER_PT * ) stackPop ( nStack );
	}

	first_np = ( KMER_PT * ) stackPop ( nStack );
	//unlink first node to the second one
	dislink2nextUncertain ( first_np->node, lastCharInKmer ( second_np->kmer ), first_np->isSmaller );
	//printf("from %llx, to %llx\n",first_np->node->seq,last_np->node->seq);
	//now temp is the last node in line, out_node is the second last node in line
	newedge->from_node = first_np->kmer;

	//create a long kmer for edge with length 1
	if ( length == 1 )
	{
		nodeCounter++;
		wordplus = KmerPlus ( newedge->from_node, lastCharInKmer ( newedge->to_node ) );
		bal_wordplus = reverseComplement ( wordplus, overlaplen + 1 );
		/*
		Kmer temp = KmerPlus(reverseComplement(newedge->to_node,overlaplen),
		            lastCharInKmer(reverseComplement(newedge->from_node,overlaplen)));
		fprintf(stderr,"(%llx %llx) (%llx %llx) (%llx %llx)\n",
		        wordplus.high,wordplus.low,temp.high,temp.low,
		            bal_wordplus.high,bal_wordplus.low);
		*/
		edge_c++;
		edgeCounter++;

		if ( KmerSmaller ( wordplus, bal_wordplus ) )
		{
			hash_ban = hash_kmer ( wordplus );
			setPicker = hash_ban % thrd_num;
			found = put_kmerset ( KmerSetsPatch[setPicker], wordplus, 4, 4, &longNode );

			if ( found )
				{ printf ( "longNode %llx %llx already exist\n", wordplus.high, wordplus.low ); }

			longNode->l_links = edge_c;
			longNode->twin = ( unsigned char ) ( bal_edge + 1 );
		}
		else
		{
			hash_ban = hash_kmer ( bal_wordplus );
			setPicker = hash_ban % thrd_num;
			found = put_kmerset ( KmerSetsPatch[setPicker], bal_wordplus, 4, 4, &longNode );

			if ( found )
				{ printf ( "longNode %llx %llx already exist\n", bal_wordplus.high, bal_wordplus.low ); }

			longNode->l_links = edge_c + bal_edge;
			longNode->twin = ( unsigned char ) ( -bal_edge + 1 );
		}
	}
	else
	{
		edge_c++;
		edgeCounter++;
	}

	stackRecover ( nStack );
	//mark all  the internal nodes
	temp = ( KMER_PT * ) stackPop ( nStack );

	while ( nStack->item_c > 1 )
	{
		temp = ( KMER_PT * ) stackPop ( nStack );
		del_node = temp->node;
		del_node->inEdge = 1;
		symbol += get_kmer_left_covs ( *del_node );

		if ( temp->isSmaller )
		{
			del_node->l_links = edge_c;
			del_node->twin = ( unsigned char ) ( bal_edge + 1 );
		}
		else
		{
			del_node->l_links = edge_c + bal_edge;
			del_node->twin = ( unsigned char ) ( -bal_edge + 1 );
		}

		tightSeq[char_index--] = lastCharInKmer ( temp->kmer );
	}

	newedge->seq = tightSeq;

	if ( length > 1 )
		{ newedge->cvg = symbol / ( length - 1 ) * 10 > MaxEdgeCov ? MaxEdgeCov : symbol / ( length - 1 ) * 10; }
	else
		{ newedge->cvg = 0; }

	output_1edge ( newedge, fp );

	if ( len_tSeq >= edge_length_limit )
		{ free ( ( void * ) tightSeq ); }

	edge_c += bal_edge;

	if ( edge_c % 10000000 == 0 )
		{ printf ( "--- %d edges built\n", edge_c ); }

	return;
}
Ejemplo n.º 3
0
//search linear structure starting with the root of a tree
static int startEdgeFromNode ( kmer_t * node1, FILE * fp )
{
	int node_c, palindrome;
	unsigned char flag;
	KMER_PT * ite_pt, *temp_pt;
	Kmer word1, bal_word1;
	char ch1;

	if ( node1->linear || node1->deleted )
		{ return 0; }

	// ignore floating loop
	word1 = node1->seq;
	bal_word1 = reverseComplement ( word1, overlaplen );

	// linear structure
	for ( ch1 = 0; ch1 < 4; ch1++ )     // for every node on outgoing list
	{
		flag = get_kmer_right_cov ( *node1, ch1 );

		if ( !flag )
			{ continue; }

		emptyStack ( nodeStack );
		temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
		temp_pt->node = node1;
		temp_pt->isSmaller = 1;
		temp_pt->kmer = word1;
		stringBeads ( temp_pt, ch1, &node_c );

		//printf("%d nodes\n",node_c);
		if ( node_c < 2 )
			{ printf ( "%d nodes in this line!!!!!!!!!!!\n", node_c ); }
		else
		{
			//make a reverse complement node list
			stackBackup ( nodeStack );
			emptyStack ( bal_nodeStack );

			while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
			{
				temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
				temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
			}

			stackRecover ( nodeStack );
			palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
			stackRecover ( nodeStack );

			if ( palindrome )
			{
				merge_linearV2 ( 0, nodeStack, node_c, fp );
			}
			else
				{ merge_linearV2 ( 1, nodeStack, node_c, fp ); }
		}
	} //every possible outgoing edges

	for ( ch1 = 0; ch1 < 4; ch1++ )     // for every node on incoming list
	{
		flag = get_kmer_left_cov ( *node1, ch1 );

		if ( !flag )
			{ continue; }

		emptyStack ( nodeStack );
		temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
		temp_pt->node = node1;
		temp_pt->isSmaller = 0;
		temp_pt->kmer = bal_word1;
		stringBeads ( temp_pt, int_comp ( ch1 ), &node_c );

		if ( node_c < 2 )
			{ printf ( "%d nodes in this line!!!!!!!!!!!\n", node_c ); }
		else
		{
			//make a reverse complement node list
			stackBackup ( nodeStack );
			emptyStack ( bal_nodeStack );

			while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
			{
				temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
				temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
			}

			stackRecover ( nodeStack );
			palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
			stackRecover ( nodeStack );

			if ( palindrome )
			{
				merge_linearV2 ( 0, nodeStack, node_c, fp );
				//printf("edge is palindrome with length %d\n",temp_edge.length);
			}
			else
				{ merge_linearV2 ( 1, nodeStack, node_c, fp ); }
		}
	} //every possible incoming edges

	return 0;
}