static void delete1contig(unsigned int edgeid)
{
    edge_array[edgeid].cvg=0;
    edge_array[edgeid].deleted=1;
    edge_array[edgeid].length=0;

    ARC *arc=edge_array[edgeid].arcs;
    while(arc)
    {
        arc->multiplicity=0;
        arc->bal_arc->multiplicity=0;
        arc=arc->next;
    }

    if(EdSameAsTwin(edgeid))
        return;
    edge_array[getTwinEdge(edgeid)].cvg=0;
    edge_array[getTwinEdge(edgeid)].deleted=1;
    edge_array[getTwinEdge(edgeid)].length=0;
    arc = edge_array[getTwinEdge(edgeid)].arcs;
    while(arc)
    {
        arc->multiplicity=0;
        arc->bal_arc->multiplicity=0;
        arc=arc->next;
    }
}
예제 #2
0
static void moveArc2cp ( unsigned int leftEd, unsigned int rightEd,
                         unsigned int source, unsigned int target )
{
	unsigned int bal_left = getTwinEdge ( leftEd );
	unsigned int bal_right = getTwinEdge ( rightEd );
	unsigned int bal_source = getTwinEdge ( source );
	unsigned int bal_target = getTwinEdge ( target );
	ARC * arc;
	ARC * newArc, *twinArc;
	//between left and source
	arc = getArcBetween ( leftEd, source );
	arc->to_ed = 0;
	newArc = allocateArc ( target );
	newArc->multiplicity = arc->multiplicity;
	newArc->prev = NULL;
	newArc->next = edge_array[leftEd].arcs;

	if ( edge_array[leftEd].arcs )
		{ edge_array[leftEd].arcs->prev = newArc; }

	edge_array[leftEd].arcs = newArc;
	arc = getArcBetween ( bal_source, bal_left );
	arc->to_ed = 0;
	twinArc = allocateArc ( bal_left );
	twinArc->multiplicity = arc->multiplicity;
	twinArc->prev = NULL;
	twinArc->next = NULL;
	edge_array[bal_target].arcs = twinArc;
	newArc->bal_arc = twinArc;
	twinArc->bal_arc = newArc;
	//between source and right
	arc = getArcBetween ( source, rightEd );
	arc->to_ed = 0;
	newArc = allocateArc ( rightEd );
	newArc->multiplicity = arc->multiplicity;
	newArc->prev = NULL;
	newArc->next = NULL;
	edge_array[target].arcs = newArc;
	arc = getArcBetween ( bal_right, bal_source );
	arc->to_ed = 0;
	twinArc = allocateArc ( bal_target );
	twinArc->multiplicity = arc->multiplicity;
	twinArc->prev = NULL;
	twinArc->next = edge_array[bal_right].arcs;

	if ( edge_array[bal_right].arcs )
		{ edge_array[bal_right].arcs->prev = twinArc; }

	edge_array[bal_right].arcs = twinArc;
	newArc->bal_arc = twinArc;
	twinArc->bal_arc = newArc;
}
예제 #3
0
Idx VoronoiDiagram<CoordT>::addVertex(
	const PointT& pos, Idx edge1, Idx edge2, Idx edge3
) {
	Idx vertex = vertex_pos_.add(pos);
	
	edges_[edge1].end_vertex = vertex;
	edges_[edge2].end_vertex = vertex;
	edges_[edge3].end_vertex = vertex;
	
	consecutiveEdges(edge1, getTwinEdge(edge3));
	consecutiveEdges(edge2, getTwinEdge(edge1));
	consecutiveEdges(edge3, getTwinEdge(edge2));
	
	return vertex;
}
void destroyEdge (unsigned int edgeid)
{
    unsigned int bal_ed = getTwinEdge (edgeid);
    ARC *arc;

    if (bal_ed == edgeid)
    {
        edge_array[edgeid].length = 0;
        return;
    }

    arc = edge_array[edgeid].arcs;

    while (arc)
    {
        arc->bal_arc->to_ed = 0;
        arc = arc->next;
    }

    arc = edge_array[bal_ed].arcs;

    while (arc)
    {
        arc->bal_arc->to_ed = 0;
        arc = arc->next;
    }

    edge_array[edgeid].arcs = NULL;
    edge_array[bal_ed].arcs = NULL;
    edge_array[edgeid].length = 0;
    edge_array[bal_ed].length = 0;
    edge_array[edgeid].deleted = 1;
    edge_array[bal_ed].deleted = 1;
    //printf("Destroyed %d and %d\n",edgeid,bal_ed);
}
static unsigned int deleteLightFlowArc(double min_arc_rate)
{
    unsigned int index,twin,count=0;
    unsigned int total_in_weight,total_out_weight,coverage;
    ARC * arc,*next_arc,*twin_arc;
    unsigned int to_ed,twin_te;
    for(index=1; index<=num_ed; index++)
    {
        total_in_weight=0;
        total_out_weight=0;
        twin = getTwinEdge(index);
        coverage = (double)edge_array[index].cvg/10;

        arc = edge_array[index].arcs;
        while(arc)
        {
            total_out_weight += arc->multiplicity;
            arc=arc->next;
        }

        arc = edge_array[twin].arcs;
        while(arc)
        {
            total_in_weight += arc->multiplicity;
            arc=arc->next;
        }

        arc = edge_array[index].arcs;
        while(arc)
        {
            next_arc = arc->next;
            to_ed = arc ->to_ed;
            if(arc->multiplicity != 0 && arc->multiplicity <= (double)total_in_weight*min_arc_rate || arc->multiplicity <= (double)coverage*min_arc_rate)
            {
                twin_arc=arc->bal_arc;
                arc->multiplicity=0;
                twin_arc->multiplicity=0;
                count++;
            }
            arc=next_arc;
        }
        arc = edge_array[twin].arcs;
        while(arc)
        {
            next_arc = arc->next;
            to_ed = arc ->to_ed;
            if(arc->multiplicity != 0 && arc->multiplicity <= (double)total_out_weight*min_arc_rate || arc->multiplicity <= (double)coverage*min_arc_rate)
            {
                twin_arc=arc->bal_arc;
                arc->multiplicity=0;
                twin_arc->multiplicity=0;
                count++;
            }
            arc=next_arc;
        }
        if(twin != index)
            index++;
    }
    return count;
}
예제 #6
0
/*************************************************
Function:
    compactEdgeArray
Description:
    Compacts the edge array by removing deleted edges.
Input:
    None.
Output:
    None.
Return:
    None.
*************************************************/
void compactEdgeArray ()
{
	unsigned int i;
	unsigned int validCounter = 0;
	unsigned int bal_ed;
	fprintf ( stderr, "Before compacting, %d edge(s) existed.\n", num_ed );

	for ( i = 1; i <= num_ed; i++ )
	{
		if ( edge_array[i].deleted )
		{
			continue;
		}

		validCounter++;

		if ( i == validCounter )
		{
			continue;
		}

		bal_ed = getTwinEdge ( i );
		edgeMove ( i, validCounter );

		if ( bal_ed != i )
		{
			i++;
			validCounter++;
		}
	}

	num_ed = validCounter;
	fprintf ( stderr, "After compacting, %d edge(s) left.\n", num_ed );
}
void removeLowCovEdges (int lenCutoff, unsigned short covCutoff)
{
    unsigned int bal_ed;
    unsigned int arcRight_n, arcLeft_n;
    ARC *arcLeft, *arcRight;
    unsigned int i;
    int counter = 0;

    for (i = 1; i <= num_ed; i++)
    {
        if (edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin (i) || edge_array[i].length == 0)
        {
            continue;
        }

        bal_ed = getTwinEdge (i);
        arcRight = arcCount (i, &arcRight_n);
        arcLeft = arcCount (bal_ed, &arcLeft_n);

        if (arcLeft_n < 1 || arcRight_n < 1)
        {
            continue;
        }

        destroyEdge (i);
        counter++;
    }

    printf ("Remove low coverage(%d): %d inner edges destroyed\n", covCutoff, counter);
    removeDeadArcs ();
    linearConcatenate ();
    compactEdgeArray ();
}
예제 #8
0
void compactEdgeArray()
{
	unsigned int i;
	unsigned int validCounter = 0;
	unsigned int bal_ed;
	printf ( "there're %d edges\n", num_ed );

	for ( i = 1; i <= num_ed; i++ )
	{
		if ( edge_array[i].deleted )
			{ continue; }

		validCounter++;

		if ( i == validCounter )
			{ continue; }

		bal_ed = getTwinEdge ( i );
		edgeMove ( i, validCounter );

		if ( bal_ed != i )
		{
			i++;
			validCounter++;
		}
	}

	num_ed = validCounter;
	printf ( "after compacting %d edges left\n", num_ed );
}
예제 #9
0
static boolean interferingCheck ( unsigned int edgeno, int repTimes )
{
	int i, j, t;
	unsigned int bal_ed;
	involved[0] = edgeno;
	i = 1;

	for ( j = 0; j < repTimes; j++ )
		{ involved[i++] = lefts[j]; }

	for ( j = 0; j < repTimes; j++ )
		{ involved[i++] = rights[j]; }

	for ( j = 0; j < i - 1; j++ )
		for ( t = j + 1; t < i; t++ )
			if ( involved[j] == involved[t] )
				{ return 1; }

	for ( j = 0; j < i; j++ )
	{
		bal_ed = getTwinEdge ( involved[j] );

		for ( t = 0; t < i; t++ )
			if ( bal_ed == involved[t] )
				{ return 1; }
	}

	return 0;
}
예제 #10
0
void output_graph ( char * outfile )
{
	char name[256];
	FILE * fp;
	unsigned int i, bal_i;
	sprintf ( name, "%s.edge.gvz", outfile );
	fp = ckopen ( name, "w" );
	fprintf ( fp, "digraph G{\n" );
	fprintf ( fp, "\tsize=\"512,512\";\n" );

	for ( i = num_ed; i > 0; i-- )
	{
		if ( edge_array[i].deleted )
			{ continue; }

		/*
		        arcCount(i,&arcNum);
		        if(arcNum<1)
		            continue;
		*/
		bal_i = getTwinEdge ( i );
		/*
		        arcCount(bal_i,&arcNum);
		        if(arcNum<1)
		            continue;
		*/
		fprintf ( fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", edge_array[i].from_vt, edge_array[i].to_vt, i, edge_array[i].length );
	}

	fprintf ( fp, "}\n" );
	fclose ( fp );
}
예제 #11
0
//a path from e1 to e2 is merged int to e1(indicate=0) or e2(indicate=1), update graph topology
void linearUpdateConnection (unsigned int e1, unsigned int e2, int indicate)
{
	unsigned int bal_ed;
	ARC *parc;

	if (!indicate)
	{
		edge_array[e1].to_vt = edge_array[e2].to_vt;
		bal_ed = getTwinEdge (e1);
		parc = edge_array[e2].arcs;

		while (parc)
		{
			parc->bal_arc->to_ed = bal_ed;
			parc = parc->next;
		}

		edge_array[e1].arcs = edge_array[e2].arcs;
		edge_array[e2].arcs = NULL;

		if (edge_array[e1].length || edge_array[e2].length)
			edge_array[e1].cvg = (edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length) / (edge_array[e1].length + edge_array[e2].length);

		edge_array[e2].deleted = 1;
	}
	else
	{
		//all the arcs pointing to e1 switch to e2
		parc = edge_array[getTwinEdge (e1)].arcs;

		while (parc)
		{
			parc->bal_arc->to_ed = e2;
			parc = parc->next;
		}

		edge_array[e1].arcs = NULL;
		edge_array[e2].from_vt = edge_array[e1].from_vt;

		if (edge_array[e1].length || edge_array[e2].length)
			edge_array[e2].cvg = (edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length) / (edge_array[e1].length + edge_array[e2].length);

		edge_array[e1].deleted = 1;
	}
}
예제 #12
0
/*************************************************
Function:
    add1marker2edge
Description:
    Records the id of read which crosses the edge.
Input:
    1. edgeno:      the edge index
    2. readid:      the read id
Output:
    None.
Return:
    None.
*************************************************/
static void add1marker2edge ( unsigned int edgeno, long long readid )
{
  if ( edge_array[edgeno].multi == 255 )
    {
      return;
    }

  unsigned int bal_ed = getTwinEdge ( edgeno );
  unsigned char counter = edge_array[edgeno].multi++;
  edge_array[edgeno].markers[counter] = readid;
  counter = edge_array[bal_ed].multi++;
  edge_array[bal_ed].markers[counter] = -readid;
}
예제 #13
0
static unsigned int cp1edge ( unsigned int source, unsigned int target )
{
	int length = edge_array[source].length;
	char * tightSeq;
	int index;
	unsigned int bal_source = getTwinEdge ( source );
	unsigned int bal_target;

	if ( bal_source > source )
		{ bal_target = target + 1; }
	else
	{
		bal_target = target;
		target = target + 1;
	}

	tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );

	for ( index = 0; index < length / 4 + 1; index++ )
		{ tightSeq[index] = edge_array[source].seq[index]; }

	edge_array[target].length = length;
	edge_array[target].cvg = edge_array[source].cvg;
	edge_array[target].to_vt = edge_array[source].to_vt;
	edge_array[target].from_vt = edge_array[source].from_vt;
	edge_array[target].seq = tightSeq;
	edge_array[target].bal_edge = edge_array[source].bal_edge;
	edge_array[target].rv = NULL;
	edge_array[target].arcs = NULL;
	edge_array[target].markers = NULL;
	edge_array[target].flag = 0;
	edge_array[target].deleted = 0;
	tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );

	for ( index = 0; index < length / 4 + 1; index++ )
		{ tightSeq[index] = edge_array[bal_source].seq[index]; }

	edge_array[bal_target].length = length;
	edge_array[bal_target].cvg = edge_array[bal_source].cvg;
	edge_array[bal_target].to_vt = edge_array[bal_source].to_vt;
	edge_array[bal_target].from_vt = edge_array[bal_source].from_vt;
	edge_array[bal_target].seq = tightSeq;
	edge_array[bal_target].bal_edge = edge_array[bal_source].bal_edge;
	edge_array[bal_target].rv = NULL;
	edge_array[bal_target].arcs = NULL;
	edge_array[bal_target].markers = NULL;
	edge_array[bal_target].flag = 0;
	edge_array[bal_target].deleted = 0;
	return target;
}
예제 #14
0
//move edge from source to target
void edgeMove (unsigned int source, unsigned int target)
{
	unsigned int bal_source, bal_target;
	ARC *arc;

	copyEdge (source, target);
	bal_source = getTwinEdge (source);

	//bal_edge
	if (bal_source != source)
	{
		bal_target = target + 1;
		copyEdge (bal_source, bal_target);
		edge_array[target].bal_edge = 2;
		edge_array[bal_target].bal_edge = 0;
	}
	else
	{
		edge_array[target].bal_edge = 1;
		bal_target = target;
	}

	//take care of the arcs
	arc = edge_array[target].arcs;

	while (arc)
	{
		arc->bal_arc->to_ed = bal_target;
		arc = arc->next;
	}

	if (bal_target == target)
	{
		return;
	}

	arc = edge_array[bal_target].arcs;

	while (arc)
	{
		arc->bal_arc->to_ed = target;
		arc = arc->next;
	}
}
void delowHighArc(int multi)
{
    unsigned int i, twin,to_edge,count = 0;
    ARC *arc, *arc_temp;
    unsigned int in_weight,out_weight,curr_weight;

    for (i = 1; i <= num_ed; i++)
    {
        in_weight=0;
        curr_weight=0;

        //获取i的in_flow权重
        twin=getTwinEdge(i);
        arc=edge_array[twin].arcs;
        while(arc)
        {
            in_weight += arc->multiplicity;
            arc=arc->next;
        }

        arc = edge_array[i].arcs;
        while (arc)
        {
            curr_weight = arc->multiplicity;

            to_edge = arc->to_ed;
            arc_temp = edge_array[to_edge].arcs;
            out_weight=0;
            while(arc_temp)
            {
                out_weight += arc_temp->multiplicity;
                arc_temp=arc_temp->next;
            }
            if( in_weight != 0 && curr_weight !=0 && curr_weight > in_weight*multi && curr_weight > out_weight*multi)
            {
                count++;
                arc->multiplicity= in_weight > out_weight ? in_weight : out_weight;
            }
            arc=arc->next;

        }
    }
//	printf("delow arc : %d\n",count);
}
void removeWeakEdges (int lenCutoff, unsigned int multiCutoff)
{
    unsigned int bal_ed;
    unsigned int arcRight_n, arcLeft_n;
    ARC *arcLeft, *arcRight;
    unsigned int i;
    int counter = 0;

    for (i = 1; i <= num_ed; i++)
    {
        if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i))
        {
            continue;
        }

        bal_ed = getTwinEdge (i);
        arcRight = arcCount (i, &arcRight_n);

        if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff)
        {
            continue;
        }

        arcLeft = arcCount (bal_ed, &arcLeft_n);

        if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff)
        {
            continue;
        }

        destroyEdge (i);
        counter++;
    }

    printf ("%d weak inner edges destroyed\n", counter);
    removeDeadArcs ();
    /*
       linearConcatenate();
       compactEdgeArray();
     */
}
static int extern_contig(unsigned int edgeid,int pool_index)
{
    if(pool[edgeid]!=0)
        return 0;
    pool[edgeid]=pool_index;
    pool[ getTwinEdge(edgeid)]=pool_index;
    int length=0;
    length += edge_array[edgeid].length;
    ARC *arc;
    unsigned int best_id;
    int max_arc;
    unsigned int curr_edge= edgeid;

    while(curr_edge)
    {
        max_arc=0;
        arc = edge_array[edgeid].arcs;
        while(arc)
        {
            if(pool[arc->to_ed] ==0)
            {
                if(arc->multiplicity > max_arc)
                {
                    max_arc=arc->multiplicity;
                    best_id=arc->to_ed;
                }
            }
            arc=arc->next;
        }
        if(max_arc>0)
        {
            pool[best_id]=pool_index;
            pool[getTwinEdge(best_id)]=pool_index;
            length += edge_array[best_id].length;
            curr_edge=best_id;
        }
        else
            curr_edge=0;
    }

    curr_edge= getTwinEdge(edgeid);


    while(curr_edge)
    {
        max_arc=0;
        arc = edge_array[edgeid].arcs;
        while(arc)
        {
            if(pool[arc->to_ed] ==0)
            {
                if(arc->multiplicity > max_arc)
                {
                    max_arc=arc->multiplicity;
                    best_id=arc->to_ed;
                }
            }
            arc=arc->next;
        }
        if(max_arc>0)
        {
            pool[best_id]=pool_index;
            pool[getTwinEdge(best_id)]=pool_index;
            length += edge_array[best_id].length;
            curr_edge=best_id;
        }
        else
            curr_edge=0;
    }
    return length;
}
예제 #18
0
void output_contig ( EDGE * ed_array, unsigned int ed_num, char * outfile, int cut_len )
{
	char    temp[256];
	FILE * fp, *fp_contig;
	int flag, count, len_c;
	int signI;
	unsigned int i;
	long long sum = 0, N90, N50;
	unsigned int * length_array;
	boolean tip;
	sprintf ( temp, "%s.contig", outfile );
	fp = ckopen ( temp, "w" );
	qsort ( &ed_array[1], ed_num, sizeof ( EDGE ), cmp_edge );
	length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) );
	kmerSeq = ( char * ) ckalloc ( overlaplen * sizeof ( char ) );
	//first scan for number counting
	count = len_c = 0;

	for ( i = 1; i <= ed_num; i++ )
	{
		if ( ( ed_array[i].length + overlaplen ) >= len_bar )
			{ length_array[len_c++] = ed_array[i].length + overlaplen; }

		if ( ed_array[i].length < 1 || ed_array[i].deleted )
			{ continue; }

		count++;

		if ( EdSmallerThanTwin ( i ) )
			{ i++; }
	}

	sum = 0;

	for ( signI = len_c - 1; signI >= 0; signI-- )
		{ sum += length_array[signI]; }

	if ( len_c > 0 )
		{ printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c ); }

	qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );
	printf ( "the longest is %dbp, ", length_array[len_c - 1] );
	N50 = sum * 0.5;
	N90 = sum * 0.9;
	sum = flag = 0;

	for ( signI = len_c - 1; signI >= 0; signI-- )
	{
		sum += length_array[signI];

		if ( !flag && sum >= N50 )
		{
			printf ( "contig N50 is %d bp,", length_array[signI] );
			flag = 1;
		}

		if ( sum >= N90 )
		{
			printf ( "contig N90 is %d bp\n", length_array[signI] );
			break;
		}
	}

	//fprintf(fp,"Number %d\n",count);

	for ( i = 1; i <= ed_num; i++ )
	{
		//if(ed_array[i].multi!=1||ed_array[i].length<1||(ed_array[i].length+overlaplen)<cut_len)
		if ( ed_array[i].deleted || ed_array[i].length < 1 )
			{ continue; }

		if ( ed_array[i].arcs && ed_array[getTwinEdge ( i )].arcs )
			{ tip = 0; }
		else
			{ tip = 1; }

		output_1contig ( i, & ( ed_array[i] ), fp, tip );

		if ( EdSmallerThanTwin ( i ) )
			{ i++; }
	}

	fclose ( fp );
	free ( ( void * ) kmerSeq );
	free ( ( void * ) length_array );
	printf ( "%d contigs longer than %d output\n", count, cut_len );
	sprintf ( temp, "%s.ContigIndex", outfile );
	fp_contig = ckopen ( temp, "w" );
	fprintf ( fp_contig, "Edge_num %d %d\n", ed_num, count );
	fprintf ( fp_contig, "index\tlength\treverseComplement\n" );

	for ( i = 1; i <= num_ed; i++ )
	{
		fprintf ( fp_contig, "%d\t%d\t", i, edge_array[i].length + overlaplen );

		if ( EdSmallerThanTwin ( i ) )
		{
			fprintf ( fp_contig, "1\n" );
			i++;
		}
		else if ( EdLargerThanTwin ( i ) )
			{ fprintf ( fp_contig, "-1\n" ); }
		else
			{ fprintf ( fp_contig, "0\n" ); }
	}

	fclose ( fp_contig );
}
예제 #19
0
/*************************************************
Function:
    loadPath
Description:
    1. Loads the path info.
    2. Records the ids of reads crossing edges.
Input:
    1. graphfile:       the input prefix
Output:
    None.
Return:
    None.
*************************************************/
boolean loadPath ( char *graphfile )
{
  FILE *fp;
  char name[256], line[1024];
  unsigned int i, bal_ed, num1, edgeno, num2;
  long long markCounter = 0, readid = 0;
  char *seg;
  sprintf ( name, "%s.markOnEdge", graphfile );
  fp = fopen ( name, "r" );

  if ( !fp )
    {
      return 0;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      edge_array[i].multi = 0;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      fscanf ( fp, "%d", &num1 );

      if ( EdSmallerThanTwin ( i ) )
        {
          fscanf ( fp, "%d", &num2 );
          bal_ed = getTwinEdge ( i );

          if ( num1 + num2 >= 255 )
            {
              edge_array[i].multi = 255;
              edge_array[bal_ed].multi = 255;
            }
          else
            {
              edge_array[i].multi = num1 + num2;
              edge_array[bal_ed].multi = num1 + num2;
              markCounter += 2 * ( num1 + num2 );
            }

          i++;
        }
      else
        {
          if ( 2 * num1 >= 255 )
            {
              edge_array[i].multi = 255;
            }
          else
            {
              edge_array[i].multi = 2 * num1;
              markCounter += 2 * num1;
            }
        }
    }

  fclose ( fp );
  fprintf ( stderr, "%lld markers overall.\n", markCounter );
  markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      edge_array[i].markers = markersArray + markCounter;
      markCounter += edge_array[i].multi;
      edge_array[i].multi = 0;
    }

  sprintf ( name, "%s.path", graphfile );
  fp = fopen ( name, "r" );

  if ( !fp )
    {
      return 0;
    }

  while ( fgets ( line, sizeof ( line ), fp ) != NULL )
    {
      //printf("%s",line);
      readid++;
      seg = strtok ( line, " " );

      while ( seg )
        {
          edgeno = atoi ( seg );
          //printf("%s, %d\n",seg,edgeno);
          add1marker2edge ( edgeno, readid );
          seg = strtok ( NULL, " " );
        }
    }

  fclose ( fp );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      markCounter += edge_array[i].multi;
    }

  fprintf ( stderr, "%lld marks loaded.\n", markCounter );
  return 1;
}
예제 #20
0
/*
        -       -
          > - <
        -       -
*/
unsigned int solvable ( unsigned int edgeno )
{
	if ( EdSameAsTwin ( edgeno ) || edge_array[edgeno].multi == 255 )
		{ return 0; }

	unsigned int bal_ed = getTwinEdge ( edgeno );
	unsigned int arcRight_n, arcLeft_n;
	unsigned int counter;
	unsigned int i, j;
	unsigned int branch, bal_branch;
	ARC * parcL, *parcR;
	parcL = arcCounts ( bal_ed, &arcLeft_n );

	if ( arcLeft_n < 2 )
		{ return 0; }

	parcR = arcCounts ( edgeno, &arcRight_n );

	if ( arcLeft_n != arcRight_n )
		{ return 0; }

	// check each right branch only has one upsteam connection
	/*
	if(edgeno==2551){
	    for(i=0;i<arcLeft_n;i++)
	        printf("%d,",lefts[i]);
	    printf("__left to %d\n",edgeno);
	    for(j=0;j<arcRight_n;j++)
	        printf("%d,",rights[j]);
	    printf("__right to %d\n",edgeno);
	}
	*/
	arcRight_n = 0;

	while ( parcR )
	{
		if ( parcR->to_ed == 0 )
		{
			parcR = parcR->next;
			continue;
		}

		branch = parcR->to_ed;

		if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
		{
			return 0;
		}

		rights[arcRight_n++] = branch;
		bal_branch = getTwinEdge ( branch );
		arcCounts ( bal_branch, &counter );

		if ( counter != 1 )
		{
			return 0;
		}

		parcR = parcR->next;
	}

	// check if each left branch only has one downsteam connection
	arcLeft_n = 0;

	while ( parcL )
	{
		if ( parcL->to_ed == 0 )
		{
			parcL = parcL->next;
			continue;
		}

		branch = parcL->to_ed;

		if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
			{ return 0; }

		bal_branch = getTwinEdge ( branch );
		lefts[arcLeft_n++] = bal_branch;
		arcCounts ( bal_branch, &counter );

		if ( counter != 1 )
			{ return 0; }

		parcL = parcL->next;
	}

	//check if reads indicate one to one connection between upsteam and downstream edges

	for ( i = 0; i < arcLeft_n; i++ )
	{
		counter = 0;

		for ( j = 0; j < arcRight_n; j++ )
		{
			gothrough[i][j] = cntByReads ( lefts[i], edgeno, rights[j] ) == 0 ? 0 : 1;
			counter += gothrough[i][j];

			if ( counter > 1 )
				{ return 0; }
		}

		if ( counter != 1 )
			{ return 0; }
	}

	for ( j = 0; j < arcRight_n; j++ )
	{
		counter = 0;

		for ( i = 0; i < arcLeft_n; i++ )
			{ counter += gothrough[i][j]; }

		if ( counter != 1 )
			{ return 0; }
	}

	return arcLeft_n;
}
boolean isUnreliableTip_strict (unsigned int edgeid, int cutLen)
{
    unsigned int arcRight_n, arcLeft_n;
    unsigned int bal_ed;
    unsigned int currentEd = edgeid;
    int length = 0;
    unsigned int mult = 0;
    ARC *arc, *activeArc = NULL, *tempArc;

    if (edgeid == 0)
    {
        return 0;
    }

    bal_ed = getTwinEdge (edgeid);

    if (bal_ed == edgeid)
    {
        return 0;
    }

    arcCount (bal_ed, &arcLeft_n);

    if (arcLeft_n > 0)
    {
        return 0;
    }

    while (currentEd)
    {
        arcCount (bal_ed, &arcLeft_n);
        tempArc = arcCount (currentEd, &arcRight_n);

        if (arcLeft_n > 1 || arcRight_n > 1)
        {
            if (arcLeft_n == 0 || length == 0)
            {
                return 0;
            }
            else
            {
                break;
            }
        }

        length += edge_array[currentEd].length;

        if (length >= cutLen)
        {
            return 0;
        }

        if (tempArc)
        {
            activeArc = tempArc;
            currentEd = activeArc->to_ed;
            bal_ed = getTwinEdge (currentEd);
        }
        else
        {
            currentEd = 0;
        }
    }

    if (currentEd == 0)
    {
        caseA++;
        return 1;
    }

    if (!activeArc)
    {
        printf ("no activeArc while checking edge %d\n", edgeid);
    }

    if (activeArc->multiplicity == 1)
    {
        caseB++;
        return 1;
    }

    for (arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next)
        if (arc->multiplicity > mult)
        {
            mult = arc->multiplicity;
        }

    if (mult > activeArc->multiplicity)
    {
        caseC++;
    }

    return mult > activeArc->multiplicity;
}
예제 #22
0
boolean loadPathBin ( char *graphfile )
{
  FILE *fp;
  char name[256];
  unsigned int i, bal_ed, num1, num2;
  long long markCounter = 0, readid = 0;
  unsigned char seg, ch;
  unsigned int *freadBuf;
  sprintf ( name, "%s.markOnEdge", graphfile );
  fp = fopen ( name, "r" );

  if ( !fp )
    {
      return 0;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      edge_array[i].multi = 0;
      edge_array[i].markers = NULL;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      fscanf ( fp, "%d", &num1 );

      if ( EdSmallerThanTwin ( i ) )
        {
          fscanf ( fp, "%d", &num2 );
          bal_ed = getTwinEdge ( i );

          if ( num1 + num2 >= 255 )
            {
              edge_array[i].multi = 255;
              edge_array[bal_ed].multi = 255;
            }
          else
            {
              edge_array[i].multi = num1 + num2;
              edge_array[bal_ed].multi = num1 + num2;
              markCounter += 2 * ( num1 + num2 );
            }

          i++;
        }
      else
        {
          if ( 2 * num1 >= 255 )
            {
              edge_array[i].multi = 255;
            }
          else
            {
              edge_array[i].multi = 2 * num1;
              markCounter += 2 * num1;
            }
        }
    }

  fclose ( fp );
  fprintf ( stderr, "%lld markers overall.\n", markCounter );
  markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      edge_array[i].markers = markersArray + markCounter;
      markCounter += edge_array[i].multi;
      edge_array[i].multi = 0;
    }

  sprintf ( name, "%s.path", graphfile );
  fp = fopen ( name, "rb" );

  if ( !fp )
    {
      return 0;
    }

  freadBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) );

  while ( fread ( &ch, sizeof ( char ), 1, fp ) == 1 )
    {
      //printf("%s",line);
      if ( fread ( freadBuf, sizeof ( unsigned int ), ch, fp ) != ch )
        {
          break;
        }

      readid++;

      for ( seg = 0; seg < ch; seg++ )
        {
          add1marker2edge ( freadBuf[seg], readid );
        }
    }

  fclose ( fp );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      markCounter += edge_array[i].multi;
    }

  for ( i = 0; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi >= 2 && edge_array[i].multi != 255 )
        {
          qsort ( edge_array[i].markers, ( int ) edge_array[i].multi, sizeof ( long long ), comp );
        }
    }

  fprintf ( stderr, "%lld markers loaded.\n", markCounter );
  free ( ( void * ) freadBuf );
  return 1;
}
예제 #23
0
//concatenate two edges if they are linearly linked
void linearConcatenate ()
{
	unsigned int i;
	int conc_c = 1;
	int counter;
	unsigned int from_ed, to_ed, bal_ed;
	ARC *parc, *parc2;
	unsigned int bal_fe;

	//debugging(30514);
	while (conc_c)
	{
		conc_c = 0;
		counter = 0;

		for (i = 1; i <= num_ed; i++)	//num_ed
		{
			if (edge_array[i].deleted || EdSameAsTwin (i))
			{
				continue;
			}

			if (edge_array[i].length > 0)
			{
				counter++;
			}

			parc = edge_array[i].arcs;

			if (!parc || parc->next)
			{
				continue;
			}

			to_ed = parc->to_ed;
			bal_ed = getTwinEdge (to_ed);
			parc2 = edge_array[bal_ed].arcs;

			if (bal_ed == to_ed || !parc2 || parc2->next)
			{
				continue;
			}

			from_ed = i;

			if (from_ed == to_ed || from_ed == bal_ed)
			{
				continue;
			}

			//linear connection found
			conc_c++;
			linearUpdateConnection (from_ed, to_ed, 0);
			allpathUpdateEdge (from_ed, to_ed, 0);
			bal_fe = getTwinEdge (from_ed);
			linearUpdateConnection (bal_ed, bal_fe, 1);
			allpathUpdateEdge (bal_ed, bal_fe, 1);
			/*
			   if(from_ed==6589||to_ed==6589)
			   printf("%d <- %d (%d)\n",from_ed,to_ed,i);
			   if(bal_fe==6589||bal_ed==6589)
			   printf("%d <- %d (%d)\n",bal_fe,bal_ed,i);
			 */
		}

		printf ("a linear concatenation lap, %d concatenated\n", conc_c);
	}

	printf ("%d edges in graph\n", counter);
}
int deleteLightContig()
{
    double prev_cov,next_cov,max,min,curr_cov;
    unsigned int index;
    int change=0;
    ARC * arc,*arc_temp;

    for(index=1; index<=num_ed; index++)
    {
        if(EdSameAsTwin(index))
            continue;
        computeNextCov(index,&next_cov);
        computeNextCov(getTwinEdge(index),&prev_cov);
        if(next_cov ==0 || prev_cov ==0)
            continue;
        if(next_cov > prev_cov)
        {
            max=next_cov;
            min=prev_cov;
        }
        else
        {
            max=prev_cov;
            min=next_cov;
        }
        curr_cov = (double)edge_array[index].cvg;
        printf("contig_cov:\t%0.1f\t%0.1f\t%0.1f\n",curr_cov,max,min);


        if(min / max <0.1)
        {
            if(curr_cov /min < 0.5)
            {
                delete1contig(index);
            }
        }
        else
        {
            if(curr_cov / max <0.05)
            {
                delete1contig(index);
            }
        }
        index++;
    }
    /*
    for(index=1;index<=num_ed;index++)
    {
    	arc=edge_array[index].arcs;
    	while(arc)
    	{
    		arc_temp=arc;
    		arc=arc->next;

    		if(arc_temp->multiplicity == 0)
    		{
    			if(arc_temp->prev)
    			{
    				arc_temp->prev->next=arc_temp->next;
    			}
    			else
    			{
    				edge_array[index].arcs=arc_temp->next;
    			}
    			if(arc_temp->next)
    			{
    				arc_temp->next->prev = arc_temp->prev;
    			}
    			dismissArc (arc_temp);
    			change++;
    		}
    	}
    }*/
    change = removeArc();
    return change>0?1:0;;
}
예제 #25
0
Idx VoronoiDiagram<CoordT>::getStartVertex(Idx edge) const {
	return edges_[getTwinEdge(edge)].end_vertex;
}
예제 #26
0
void output_contig (EDGE * ed_array, unsigned int ed_num, char *outfile, int cut_len) 
{
	char temp[256];

	FILE * fp, *fp_contig;
	int flag, count, len_c;
	int signI;
	unsigned int i, j, diff_len=0;
	long long sum = 0, N90, N50;
	unsigned int *length_array;

	boolean tip;
	sprintf (temp, "%s.contig", outfile);
	fp = ckopen (temp, "w");

	index_array = (unsigned int *)ckalloc((ed_num+1)*sizeof(unsigned int));
	unsigned int * all_length_arr = (unsigned int*) ckalloc((ed_num+1)*sizeof(unsigned int));
	flag_array = (unsigned int*)ckalloc((ed_num+1)*sizeof(unsigned int));

	for (i=1; i<=ed_num; ++i)
	{
		index_array[i] = ed_array[i].length;
		all_length_arr[i] = ed_array[i].length;
	}

	qsort(&all_length_arr[1], ed_num, sizeof(all_length_arr[0]), cmp_int);

	for (i=1; i<=ed_num; ++i)
	{
		for (j=i+1; j<=ed_num; ++j)
		{
			if (all_length_arr[i] != all_length_arr[j])
				break;
		}
		all_length_arr[++diff_len] = all_length_arr[i];
		flag_array[diff_len] = i;
		i = j-1;

	}

	for (i=1; i<=ed_num; ++i)
	{
		index_array[i] = uniqueLenSearch(all_length_arr, flag_array, diff_len, index_array[i]);
	}

	for (i=1; i<=ed_num; ++i)
	{
		flag_array[index_array[i]] = i;
	}

	free((void*)all_length_arr);

	length_array = (unsigned int *) ckalloc (ed_num * sizeof (unsigned int));
	kmerSeq = (char *) ckalloc (overlaplen * sizeof (char));

	count = len_c = 0;
	for (i = 1; i <= ed_num; i++)
	{
		if ((ed_array[i].length + overlaplen) >= len_bar)	
		{
			length_array[len_c++] = ed_array[i].length + overlaplen;
		}
		if (ed_array[i].length < 1 || ed_array[i].deleted)	
		{
			continue;
		}
		count++;
		if (EdSmallerThanTwin (i))		
		{
			i++;
		}
	}
	sum = 0;
	for (signI = len_c - 1; signI >= 0; signI--)	
	{
		sum += length_array[signI];
	}
	
	qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );

	if ( len_c > 0 )
	{
		printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c );
		printf ( "the longest is %dbp, ", length_array[len_c - 1] );
	}
	
	N50 = sum * 0.5;
	N90 = sum * 0.9;
	sum = flag = 0;
	for (signI = len_c - 1; signI >= 0; signI--)
	{
		sum += length_array[signI];
		if (!flag && sum >= N50)
		{
			printf ("contig N50 is %d bp,", length_array[signI]);
			flag = 1;
		}
		if (sum >= N90)	
		{
			printf ("contig N90 is %d bp\n", length_array[signI]);
			break;
		}
	}
	
	for (i = 1; i <= ed_num; i++)
	{
		j = flag_array[i];
		if (ed_array[j].deleted || ed_array[j].length < 1)	
		{
			continue;
		}
		if (ed_array[j].arcs && ed_array[getTwinEdge (j)].arcs)
		{
			tip = 0;
		}
		
		else	
		{
			tip = 1;
		}
		output_1contig (i, &(ed_array[j]), fp, tip);
		if (EdSmallerThanTwin (j))		
		{
			i++;
		}
	}

	fclose (fp);
	free ((void *) kmerSeq);
	free ((void *) length_array);
	printf ("%d contigs longer than %d output\n", count, cut_len);
	sprintf (temp, "%s.ContigIndex", outfile);
	fp_contig = ckopen (temp, "w");
	fprintf (fp_contig, "Edge_num %d %d\n", ed_num, count);
	fprintf (fp_contig, "index\tlength\treverseComplement\n");

	for (i = 1; i <= num_ed; i++)
	{
		j = flag_array[i];
		fprintf (fp_contig, "%d\t%d\t", i, edge_array[j].length + overlaplen);
		if (EdSmallerThanTwin (j))
		{
			fprintf (fp_contig, "1\n");
			i++;
		}
		
		else if (EdLargerThanTwin (j))
		{
			fprintf (fp_contig, "-1\n");
		}
		
		else
		{
			fprintf (fp_contig, "0\n");
		}
	}
	fclose (fp_contig);
}