Beispiel #1
0
void sortedge()
{
	unsigned int index ;
	EDGE * sort_edge , * backup_edge ;
	sort_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) * ( num_ed + 1 ) );
	backup_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) * ( num_ed + 1 ) );
	unsigned int i = 1;

	for ( index = 1 ; index <= num_ed ; index ++ )
	{
		sort_edge[i].from_vt = edge_array[index].from_vt;
		sort_edge[i].seq = edge_array[index].seq;
		sort_edge[i].to_vt = index; // record old id
		sort_edge[i].length = edge_array[index].length;
		i++;
		copyOneEdge ( & ( backup_edge[index] ) , & ( edge_array[index] ) );

		if ( !EdSameAsTwin ( index ) )
		{
			index++;
			copyOneEdge ( & ( backup_edge[index] ) , & ( edge_array[index] ) );
		}
	}

	qsort ( & ( sort_edge[1] ), i - 1, sizeof ( sort_edge[1] ), cmp_seq );
	index_array = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * ( num_ed + 1 ) ); // used to record new id
	unsigned int new_index = 1, old_index;

	for ( index = 1; index <= i - 1; index++ )
	{
		old_index = sort_edge[index].to_vt; // old id
		sort_edge[index].seq = NULL;
		index_array[old_index] = new_index++;// old id -> new id

		if ( !EdSameAsTwin ( old_index ) )
		{
			index_array[old_index + 1] = new_index++; // old id -> new id
		}
	}

	for ( index = 1; index <= num_ed; index++ )
	{
		new_index = index_array[index];
		copyOneEdge ( & ( edge_array[new_index] ), & ( backup_edge[index] ) );
		updateArcToEd ( new_index );
	}

	free ( index_array );
	free ( sort_edge );
	free ( backup_edge );
};
static void delete1contig(unsigned int edgeid)
{
    edge_array[edgeid].cvg=0;
    edge_array[edgeid].deleted=1;
    edge_array[edgeid].length=0;

    ARC *arc=edge_array[edgeid].arcs;
    while(arc)
    {
        arc->multiplicity=0;
        arc->bal_arc->multiplicity=0;
        arc=arc->next;
    }

    if(EdSameAsTwin(edgeid))
        return;
    edge_array[getTwinEdge(edgeid)].cvg=0;
    edge_array[getTwinEdge(edgeid)].deleted=1;
    edge_array[getTwinEdge(edgeid)].length=0;
    arc = edge_array[getTwinEdge(edgeid)].arcs;
    while(arc)
    {
        arc->multiplicity=0;
        arc->bal_arc->multiplicity=0;
        arc=arc->next;
    }
}
void deleteShortContig(int cutLength)
{
    unsigned int index;
    if(pool== NULL)
        pool= (int*)ckalloc (sizeof(int)*(num_ed+1));
    int * poolid_length=(int*)ckalloc(sizeof(int)*(num_ed+1));
    for(index=0; index<=num_ed; index++)
    {
        pool[index]=0;
        poolid_length[index]=0;
    }
    int poolid_index=1;

    COV_LIST * cov = (COV_LIST * ) ckalloc (sizeof(COV_LIST)*(num_ed+1));
    for(index=1; index<=num_ed; index++)
    {
        cov[index].contig=index;
        cov[index].cov=edge_array[index].cvg;
    }
    qsort(&cov[1], num_ed, sizeof(COV_LIST), cmp_cov);
    for(index=1; index<=num_ed; index++)
    {
        poolid_length[poolid_index]=extern_contig(cov[index].contig,poolid_index);
        if(poolid_length[poolid_index]!=0)
            poolid_index++;
    }

    int num_delelte=0;
    for(index=1; index<=num_ed; index++)
    {
        if(poolid_length[pool[index]]<cutLength)
        {
            delete1contig(index);
            num_delelte++;
        }
        if(!EdSameAsTwin(index))
            index++;
    }

    free(poolid_length);
    free(pool);
    free(cov);
    printf("%d short contig(<%d) removed \n",num_delelte,cutLength);

    removeArc();
}
void deleteWeakEdge(unsigned short cutoff)
{
    if(cutoff > 30)
        cutoff=30;
    printf("Start to remove the low coverage edge < %d\n",cutoff/10);
    unsigned int index;
    int total=0;
    for(index=1; index<=num_ed; index++)
    {
        if(edge_array[index].cvg < cutoff)
        {
            delete1contig(index);
            total++;
        }
        if(!EdSameAsTwin(index))
            index++;
    }
    printf("%d edges removed\n\n",total);
    removeArc();
}
//concatenate two edges if they are linearly linked
void linearConcatenate ()
{
	unsigned int i;
	int conc_c = 1;
	int counter;
	unsigned int from_ed, to_ed, bal_ed;
	ARC *parc, *parc2;
	unsigned int bal_fe;

	//debugging(30514);
	while (conc_c)
	{
		conc_c = 0;
		counter = 0;

		for (i = 1; i <= num_ed; i++)	//num_ed
		{
			if (edge_array[i].deleted || EdSameAsTwin (i))
			{
				continue;
			}

			if (edge_array[i].length > 0)
			{
				counter++;
			}

			parc = edge_array[i].arcs;

			if (!parc || parc->next)
			{
				continue;
			}

			to_ed = parc->to_ed;
			bal_ed = getTwinEdge (to_ed);
			parc2 = edge_array[bal_ed].arcs;

			if (bal_ed == to_ed || !parc2 || parc2->next)
			{
				continue;
			}

			from_ed = i;

			if (from_ed == to_ed || from_ed == bal_ed)
			{
				continue;
			}

			//linear connection found
			conc_c++;
			linearUpdateConnection (from_ed, to_ed, 0);
			allpathUpdateEdge (from_ed, to_ed, 0);
			bal_fe = getTwinEdge (from_ed);
			linearUpdateConnection (bal_ed, bal_fe, 1);
			allpathUpdateEdge (bal_ed, bal_fe, 1);
			/*
			   if(from_ed==6589||to_ed==6589)
			   printf("%d <- %d (%d)\n",from_ed,to_ed,i);
			   if(bal_fe==6589||bal_ed==6589)
			   printf("%d <- %d (%d)\n",bal_fe,bal_ed,i);
			 */
		}

		printf ("a linear concatenation lap, %d concatenated\n", conc_c);
	}

	printf ("%d edges in graph\n", counter);
}
Beispiel #6
0
/*
        -       -
          > - <
        -       -
*/
unsigned int solvable ( unsigned int edgeno )
{
	if ( EdSameAsTwin ( edgeno ) || edge_array[edgeno].multi == 255 )
		{ return 0; }

	unsigned int bal_ed = getTwinEdge ( edgeno );
	unsigned int arcRight_n, arcLeft_n;
	unsigned int counter;
	unsigned int i, j;
	unsigned int branch, bal_branch;
	ARC * parcL, *parcR;
	parcL = arcCounts ( bal_ed, &arcLeft_n );

	if ( arcLeft_n < 2 )
		{ return 0; }

	parcR = arcCounts ( edgeno, &arcRight_n );

	if ( arcLeft_n != arcRight_n )
		{ return 0; }

	// check each right branch only has one upsteam connection
	/*
	if(edgeno==2551){
	    for(i=0;i<arcLeft_n;i++)
	        printf("%d,",lefts[i]);
	    printf("__left to %d\n",edgeno);
	    for(j=0;j<arcRight_n;j++)
	        printf("%d,",rights[j]);
	    printf("__right to %d\n",edgeno);
	}
	*/
	arcRight_n = 0;

	while ( parcR )
	{
		if ( parcR->to_ed == 0 )
		{
			parcR = parcR->next;
			continue;
		}

		branch = parcR->to_ed;

		if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
		{
			return 0;
		}

		rights[arcRight_n++] = branch;
		bal_branch = getTwinEdge ( branch );
		arcCounts ( bal_branch, &counter );

		if ( counter != 1 )
		{
			return 0;
		}

		parcR = parcR->next;
	}

	// check if each left branch only has one downsteam connection
	arcLeft_n = 0;

	while ( parcL )
	{
		if ( parcL->to_ed == 0 )
		{
			parcL = parcL->next;
			continue;
		}

		branch = parcL->to_ed;

		if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
			{ return 0; }

		bal_branch = getTwinEdge ( branch );
		lefts[arcLeft_n++] = bal_branch;
		arcCounts ( bal_branch, &counter );

		if ( counter != 1 )
			{ return 0; }

		parcL = parcL->next;
	}

	//check if reads indicate one to one connection between upsteam and downstream edges

	for ( i = 0; i < arcLeft_n; i++ )
	{
		counter = 0;

		for ( j = 0; j < arcRight_n; j++ )
		{
			gothrough[i][j] = cntByReads ( lefts[i], edgeno, rights[j] ) == 0 ? 0 : 1;
			counter += gothrough[i][j];

			if ( counter > 1 )
				{ return 0; }
		}

		if ( counter != 1 )
			{ return 0; }
	}

	for ( j = 0; j < arcRight_n; j++ )
	{
		counter = 0;

		for ( i = 0; i < arcLeft_n; i++ )
			{ counter += gothrough[i][j]; }

		if ( counter != 1 )
			{ return 0; }
	}

	return arcLeft_n;
}
Beispiel #7
0
void swapedge()
{
	unsigned int i;
	ARC * arc, *bal_arc, *temp_arc;
	int count_swap = 0, count_equal = 0;

	for ( i = 1; i <= num_ed; ++i )
	{
		if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
			{ continue; }

		if ( EdSmallerThanTwin ( i ) )
		{
			if ( KmerLarger ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
			{
				count_swap++;
				copyEdge ( i, num_ed + 1 + 1 );
				copyEdge ( i + 1, num_ed + 1 );
				copyEdge ( num_ed + 1, i );
				copyEdge ( num_ed + 1 + 1, i + 1 );
				edge_array[i].bal_edge = 2;
				edge_array[i + 1].bal_edge = 0;
				//take care of the arcs
				arc = edge_array[i].arcs;

				while ( arc )
				{
					arc->bal_arc->to_ed = i + 1;
					arc = arc->next;
				}

				arc = edge_array[i + 1].arcs;

				while ( arc )
				{
					arc->bal_arc->to_ed = i;
					arc = arc->next;
				}
			}
			else if ( KmerEqual ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
			{
				int temp = EdgeEqual ( i, i + 1 );

				if ( temp == 0 )
				{
					count_equal++;
					edge_array[i].bal_edge = 1;
					delete1Edge ( i + 1 );
					//take care of the arcs
					arc = edge_array[i].arcs;

					while ( arc )
					{
						arc->bal_arc->to_ed = i;
						arc = arc->next;
					}

					bal_arc = edge_array[i + 1].arcs;
					edge_array[i + 1].arcs = NULL;

					while ( bal_arc )
					{
						temp_arc = bal_arc;
						bal_arc = bal_arc->next;

						if ( edge_array[i].arcs )
							{ edge_array[i].arcs->prev = temp_arc; }

						temp_arc->next = edge_array[i].arcs;
						edge_array[i].arcs = temp_arc;
					}
				}
				else if ( temp > 0 )
				{
					count_swap++;
					copyEdge ( i, num_ed + 1 + 1 );
					copyEdge ( i + 1, num_ed + 1 );
					copyEdge ( num_ed + 1, i );
					copyEdge ( num_ed + 1 + 1, i + 1 );
					edge_array[i].bal_edge = 2;
					edge_array[i + 1].bal_edge = 0;
					//take care of the arcs
					arc = edge_array[i].arcs;

					while ( arc )
					{
						arc->bal_arc->to_ed = i + 1;
						arc = arc->next;
					}

					arc = edge_array[i + 1].arcs;

					while ( arc )
					{
						arc->bal_arc->to_ed = i;
						arc = arc->next;
					}
				}
			}

			++i;
		}
		else
		{
			delete1Edge ( i );
			printf( "Warning : Front edge %d is larger than %d.\n", i, i + 1 );
		}
	}

	printf( "%d none-palindrome edge(s) swapped, %d palindrome edge(s) processed.\n", count_swap, count_equal );
};
int deleteLightContig()
{
    double prev_cov,next_cov,max,min,curr_cov;
    unsigned int index;
    int change=0;
    ARC * arc,*arc_temp;

    for(index=1; index<=num_ed; index++)
    {
        if(EdSameAsTwin(index))
            continue;
        computeNextCov(index,&next_cov);
        computeNextCov(getTwinEdge(index),&prev_cov);
        if(next_cov ==0 || prev_cov ==0)
            continue;
        if(next_cov > prev_cov)
        {
            max=next_cov;
            min=prev_cov;
        }
        else
        {
            max=prev_cov;
            min=next_cov;
        }
        curr_cov = (double)edge_array[index].cvg;
        printf("contig_cov:\t%0.1f\t%0.1f\t%0.1f\n",curr_cov,max,min);


        if(min / max <0.1)
        {
            if(curr_cov /min < 0.5)
            {
                delete1contig(index);
            }
        }
        else
        {
            if(curr_cov / max <0.05)
            {
                delete1contig(index);
            }
        }
        index++;
    }
    /*
    for(index=1;index<=num_ed;index++)
    {
    	arc=edge_array[index].arcs;
    	while(arc)
    	{
    		arc_temp=arc;
    		arc=arc->next;

    		if(arc_temp->multiplicity == 0)
    		{
    			if(arc_temp->prev)
    			{
    				arc_temp->prev->next=arc_temp->next;
    			}
    			else
    			{
    				edge_array[index].arcs=arc_temp->next;
    			}
    			if(arc_temp->next)
    			{
    				arc_temp->next->prev = arc_temp->prev;
    			}
    			dismissArc (arc_temp);
    			change++;
    		}
    	}
    }*/
    change = removeArc();
    return change>0?1:0;;
}
void removeLowCovEdges (int lenCutoff, unsigned short covCutoff)
{
    unsigned int bal_ed;
    unsigned int arcRight_n, arcLeft_n;
    ARC *arcLeft, *arcRight;
    unsigned int i;
    int counter = 0;

    for (i = 1; i <= num_ed; i++)
    {
        if (edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin (i) || edge_array[i].length == 0)
        {
            continue;
        }

        bal_ed = getTwinEdge (i);
        arcRight = arcCount (i, &arcRight_n);
        arcLeft = arcCount (bal_ed, &arcLeft_n);

        if (arcLeft_n < 1 || arcRight_n < 1)
        {
            continue;
        }

        destroyEdge (i);
        counter++;
    }

    printf ("Remove low coverage(%d): %d inner edges destroyed\n", covCutoff, counter);
    removeDeadArcs ();
    linearConcatenate ();
    compactEdgeArray ();
}
void removeWeakEdges (int lenCutoff, unsigned int multiCutoff)
{
    unsigned int bal_ed;
    unsigned int arcRight_n, arcLeft_n;
    ARC *arcLeft, *arcRight;
    unsigned int i;
    int counter = 0;

    for (i = 1; i <= num_ed; i++)
    {
        if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i))
        {
            continue;
        }

        bal_ed = getTwinEdge (i);
        arcRight = arcCount (i, &arcRight_n);

        if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff)
        {
            continue;
        }

        arcLeft = arcCount (bal_ed, &arcLeft_n);

        if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff)
        {
            continue;
        }

        destroyEdge (i);
        counter++;
    }

    printf ("%d weak inner edges destroyed\n", counter);
    removeDeadArcs ();
    /*
       linearConcatenate();
       compactEdgeArray();
     */
}
void resetCov()
{
    unsigned int index;
    if(pool== NULL)
        pool= (int*)ckalloc (sizeof(int)*(num_ed+1));
    int * poolid_length=(int*)ckalloc(sizeof(int)*(num_ed+1));
    for(index=0; index<=num_ed; index++)
    {
        pool[index]=0;
        poolid_length[index]=0;
    }
    int poolid_index=1;

    COV_LIST * cov = (COV_LIST * ) ckalloc (sizeof(COV_LIST)*(num_ed+1));
    for(index=1; index<=num_ed; index++)
    {
        cov[index].contig=index;
        cov[index].cov=edge_array[index].cvg;
    }
    qsort(&cov[1], num_ed, sizeof(COV_LIST), cmp_cov);
    /*
    	for(index=1;index<=num_ed;index++)
    	{
    		printf("cov:\t%d\n",cov[index].cov);
    	}
    */

    for(index=1; index<=num_ed; index++)
    {
        poolid_length[poolid_index]=extern_contig(cov[index].contig,poolid_index);
        if(poolid_length[poolid_index]!=0)
            poolid_index++;
    }
    int i;
    unsigned int *contig_cov = (unsigned int *) ckalloc ( sizeof(unsigned int ) * (poolid_index + 1));
    unsigned int *contig_length = (unsigned int *) ckalloc ( sizeof(unsigned int ) * (poolid_index + 1));
    for(i=1; i<poolid_index; i++)
    {
        contig_cov[i]=0;
        contig_length[i]=0;
    }
    for(index=1; index<=num_ed; index++)
    {
        contig_cov[pool[index]]+=edge_array[index].cvg *edge_array[index].length;
        contig_length[pool[index]]+=edge_array[index].length;
        if(!(EdSameAsTwin(index)))
            index++;
    }
    for(i=1; i<poolid_index; i++)
    {
        if(contig_length[i]>0)
            contig_cov[i] /= contig_length[i];
        else
            printf("pool length == 0\n");
    }
    for(index=1; index<=num_ed; index++)
    {
        edge_array[index].cvg = contig_cov[pool[index]];
    }
    free(cov);
    free(pool);
    pool=NULL;
    free(poolid_length);
    free(contig_cov);
    free(contig_length);
}