void output_updated_edges ( char * outfile )
{
	FILE * fp;
	char name[256];
	unsigned int i, validCounter = 0;
	EDGE * edge;
	sprintf ( name, "%s.updated.edge", outfile );
	fp = ckopen ( name, "w" );

	for ( i = 1; i <= num_ed; i++ )
		{ validCounter++; }

	fprintf ( fp, "EDGEs %d\n", validCounter );
	validCounter = 0;

	for ( i = 1; i <= num_ed; i++ )
	{
		edge = &edge_array[i];
		fprintf ( fp, ">length %d,", edge->length );
		print_kmer ( fp, vt_array[edge->from_vt].kmer, ',' );
		print_kmer ( fp, vt_array[edge->to_vt].kmer, ',' );

		if ( EdSmallerThanTwin ( i ) )
			{ fprintf ( fp, "1," ); }
		else if ( EdLargerThanTwin ( i ) )
			{ fprintf ( fp, "-1," ); }
		else
			{ fprintf ( fp, "0," ); }

		fprintf ( fp, "%d\n", edge->cvg );
	}

	fclose ( fp );
}
void solveReps()
{
	unsigned int i;
	unsigned int repTime;
	int counter = 0;
	boolean flag;
	//debugging(30514);
	extraEdgeNum = num_ed + 1;

	for ( i = 1; i <= num_ed; i++ )
	{
		repTime = solvable ( i );

		if ( repTime == 0 )
			{ continue; }

		flag = interferingCheck ( i, repTime );

		if ( flag )
			{ continue; }

		split1edge ( i, repTime );
		counter ++;  //+= 2*(repTime-1);

		if ( EdSmallerThanTwin ( i ) )
			{ i++; }
	}

	printf ( "%d repeats solvable, %d more edges\n", counter, extraEdgeNum - 1 - num_ed );
	num_ed = extraEdgeNum - 1;
	removeDeadArcs();

	if ( markersArray )
	{
		free ( ( void * ) markersArray );
		markersArray = NULL;
	}
}
void output_contig ( EDGE * ed_array, unsigned int ed_num, char * outfile, int cut_len )
{
	char    temp[256];
	FILE * fp, *fp_contig;
	int flag, count, len_c;
	int signI;
	unsigned int i;
	long long sum = 0, N90, N50;
	unsigned int * length_array;
	boolean tip;
	sprintf ( temp, "%s.contig", outfile );
	fp = ckopen ( temp, "w" );
	qsort ( &ed_array[1], ed_num, sizeof ( EDGE ), cmp_edge );
	length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) );
	kmerSeq = ( char * ) ckalloc ( overlaplen * sizeof ( char ) );
	//first scan for number counting
	count = len_c = 0;

	for ( i = 1; i <= ed_num; i++ )
	{
		if ( ( ed_array[i].length + overlaplen ) >= len_bar )
			{ length_array[len_c++] = ed_array[i].length + overlaplen; }

		if ( ed_array[i].length < 1 || ed_array[i].deleted )
			{ continue; }

		count++;

		if ( EdSmallerThanTwin ( i ) )
			{ i++; }
	}

	sum = 0;

	for ( signI = len_c - 1; signI >= 0; signI-- )
		{ sum += length_array[signI]; }

	if ( len_c > 0 )
		{ printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c ); }

	qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );
	printf ( "the longest is %dbp, ", length_array[len_c - 1] );
	N50 = sum * 0.5;
	N90 = sum * 0.9;
	sum = flag = 0;

	for ( signI = len_c - 1; signI >= 0; signI-- )
	{
		sum += length_array[signI];

		if ( !flag && sum >= N50 )
		{
			printf ( "contig N50 is %d bp,", length_array[signI] );
			flag = 1;
		}

		if ( sum >= N90 )
		{
			printf ( "contig N90 is %d bp\n", length_array[signI] );
			break;
		}
	}

	//fprintf(fp,"Number %d\n",count);

	for ( i = 1; i <= ed_num; i++ )
	{
		//if(ed_array[i].multi!=1||ed_array[i].length<1||(ed_array[i].length+overlaplen)<cut_len)
		if ( ed_array[i].deleted || ed_array[i].length < 1 )
			{ continue; }

		if ( ed_array[i].arcs && ed_array[getTwinEdge ( i )].arcs )
			{ tip = 0; }
		else
			{ tip = 1; }

		output_1contig ( i, & ( ed_array[i] ), fp, tip );

		if ( EdSmallerThanTwin ( i ) )
			{ i++; }
	}

	fclose ( fp );
	free ( ( void * ) kmerSeq );
	free ( ( void * ) length_array );
	printf ( "%d contigs longer than %d output\n", count, cut_len );
	sprintf ( temp, "%s.ContigIndex", outfile );
	fp_contig = ckopen ( temp, "w" );
	fprintf ( fp_contig, "Edge_num %d %d\n", ed_num, count );
	fprintf ( fp_contig, "index\tlength\treverseComplement\n" );

	for ( i = 1; i <= num_ed; i++ )
	{
		fprintf ( fp_contig, "%d\t%d\t", i, edge_array[i].length + overlaplen );

		if ( EdSmallerThanTwin ( i ) )
		{
			fprintf ( fp_contig, "1\n" );
			i++;
		}
		else if ( EdLargerThanTwin ( i ) )
			{ fprintf ( fp_contig, "-1\n" ); }
		else
			{ fprintf ( fp_contig, "0\n" ); }
	}

	fclose ( fp_contig );
}
void swapedge()
{
	unsigned int i;
	ARC * arc, *bal_arc, *temp_arc;
	int count_swap = 0, count_equal = 0;

	for ( i = 1; i <= num_ed; ++i )
	{
		if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
			{ continue; }

		if ( EdSmallerThanTwin ( i ) )
		{
			if ( KmerLarger ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
			{
				count_swap++;
				copyEdge ( i, num_ed + 1 + 1 );
				copyEdge ( i + 1, num_ed + 1 );
				copyEdge ( num_ed + 1, i );
				copyEdge ( num_ed + 1 + 1, i + 1 );
				edge_array[i].bal_edge = 2;
				edge_array[i + 1].bal_edge = 0;
				//take care of the arcs
				arc = edge_array[i].arcs;

				while ( arc )
				{
					arc->bal_arc->to_ed = i + 1;
					arc = arc->next;
				}

				arc = edge_array[i + 1].arcs;

				while ( arc )
				{
					arc->bal_arc->to_ed = i;
					arc = arc->next;
				}
			}
			else if ( KmerEqual ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
			{
				int temp = EdgeEqual ( i, i + 1 );

				if ( temp == 0 )
				{
					count_equal++;
					edge_array[i].bal_edge = 1;
					delete1Edge ( i + 1 );
					//take care of the arcs
					arc = edge_array[i].arcs;

					while ( arc )
					{
						arc->bal_arc->to_ed = i;
						arc = arc->next;
					}

					bal_arc = edge_array[i + 1].arcs;
					edge_array[i + 1].arcs = NULL;

					while ( bal_arc )
					{
						temp_arc = bal_arc;
						bal_arc = bal_arc->next;

						if ( edge_array[i].arcs )
							{ edge_array[i].arcs->prev = temp_arc; }

						temp_arc->next = edge_array[i].arcs;
						edge_array[i].arcs = temp_arc;
					}
				}
				else if ( temp > 0 )
				{
					count_swap++;
					copyEdge ( i, num_ed + 1 + 1 );
					copyEdge ( i + 1, num_ed + 1 );
					copyEdge ( num_ed + 1, i );
					copyEdge ( num_ed + 1 + 1, i + 1 );
					edge_array[i].bal_edge = 2;
					edge_array[i + 1].bal_edge = 0;
					//take care of the arcs
					arc = edge_array[i].arcs;

					while ( arc )
					{
						arc->bal_arc->to_ed = i + 1;
						arc = arc->next;
					}

					arc = edge_array[i + 1].arcs;

					while ( arc )
					{
						arc->bal_arc->to_ed = i;
						arc = arc->next;
					}
				}
			}

			++i;
		}
		else
		{
			delete1Edge ( i );
			printf( "Warning : Front edge %d is larger than %d.\n", i, i + 1 );
		}
	}

	printf( "%d none-palindrome edge(s) swapped, %d palindrome edge(s) processed.\n", count_swap, count_equal );
};
Exemple #5
0
/*************************************************
Function:
    loadPath
Description:
    1. Loads the path info.
    2. Records the ids of reads crossing edges.
Input:
    1. graphfile:       the input prefix
Output:
    None.
Return:
    None.
*************************************************/
boolean loadPath ( char *graphfile )
{
  FILE *fp;
  char name[256], line[1024];
  unsigned int i, bal_ed, num1, edgeno, num2;
  long long markCounter = 0, readid = 0;
  char *seg;
  sprintf ( name, "%s.markOnEdge", graphfile );
  fp = fopen ( name, "r" );

  if ( !fp )
    {
      return 0;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      edge_array[i].multi = 0;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      fscanf ( fp, "%d", &num1 );

      if ( EdSmallerThanTwin ( i ) )
        {
          fscanf ( fp, "%d", &num2 );
          bal_ed = getTwinEdge ( i );

          if ( num1 + num2 >= 255 )
            {
              edge_array[i].multi = 255;
              edge_array[bal_ed].multi = 255;
            }
          else
            {
              edge_array[i].multi = num1 + num2;
              edge_array[bal_ed].multi = num1 + num2;
              markCounter += 2 * ( num1 + num2 );
            }

          i++;
        }
      else
        {
          if ( 2 * num1 >= 255 )
            {
              edge_array[i].multi = 255;
            }
          else
            {
              edge_array[i].multi = 2 * num1;
              markCounter += 2 * num1;
            }
        }
    }

  fclose ( fp );
  fprintf ( stderr, "%lld markers overall.\n", markCounter );
  markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      edge_array[i].markers = markersArray + markCounter;
      markCounter += edge_array[i].multi;
      edge_array[i].multi = 0;
    }

  sprintf ( name, "%s.path", graphfile );
  fp = fopen ( name, "r" );

  if ( !fp )
    {
      return 0;
    }

  while ( fgets ( line, sizeof ( line ), fp ) != NULL )
    {
      //printf("%s",line);
      readid++;
      seg = strtok ( line, " " );

      while ( seg )
        {
          edgeno = atoi ( seg );
          //printf("%s, %d\n",seg,edgeno);
          add1marker2edge ( edgeno, readid );
          seg = strtok ( NULL, " " );
        }
    }

  fclose ( fp );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      markCounter += edge_array[i].multi;
    }

  fprintf ( stderr, "%lld marks loaded.\n", markCounter );
  return 1;
}
Exemple #6
0
boolean loadPathBin ( char *graphfile )
{
  FILE *fp;
  char name[256];
  unsigned int i, bal_ed, num1, num2;
  long long markCounter = 0, readid = 0;
  unsigned char seg, ch;
  unsigned int *freadBuf;
  sprintf ( name, "%s.markOnEdge", graphfile );
  fp = fopen ( name, "r" );

  if ( !fp )
    {
      return 0;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      edge_array[i].multi = 0;
      edge_array[i].markers = NULL;
    }

  for ( i = 1; i <= num_ed; i++ )
    {
      fscanf ( fp, "%d", &num1 );

      if ( EdSmallerThanTwin ( i ) )
        {
          fscanf ( fp, "%d", &num2 );
          bal_ed = getTwinEdge ( i );

          if ( num1 + num2 >= 255 )
            {
              edge_array[i].multi = 255;
              edge_array[bal_ed].multi = 255;
            }
          else
            {
              edge_array[i].multi = num1 + num2;
              edge_array[bal_ed].multi = num1 + num2;
              markCounter += 2 * ( num1 + num2 );
            }

          i++;
        }
      else
        {
          if ( 2 * num1 >= 255 )
            {
              edge_array[i].multi = 255;
            }
          else
            {
              edge_array[i].multi = 2 * num1;
              markCounter += 2 * num1;
            }
        }
    }

  fclose ( fp );
  fprintf ( stderr, "%lld markers overall.\n", markCounter );
  markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      edge_array[i].markers = markersArray + markCounter;
      markCounter += edge_array[i].multi;
      edge_array[i].multi = 0;
    }

  sprintf ( name, "%s.path", graphfile );
  fp = fopen ( name, "rb" );

  if ( !fp )
    {
      return 0;
    }

  freadBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) );

  while ( fread ( &ch, sizeof ( char ), 1, fp ) == 1 )
    {
      //printf("%s",line);
      if ( fread ( freadBuf, sizeof ( unsigned int ), ch, fp ) != ch )
        {
          break;
        }

      readid++;

      for ( seg = 0; seg < ch; seg++ )
        {
          add1marker2edge ( freadBuf[seg], readid );
        }
    }

  fclose ( fp );
  markCounter = 0;

  for ( i = 1; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi == 255 )
        {
          continue;
        }

      markCounter += edge_array[i].multi;
    }

  for ( i = 0; i <= num_ed; i++ )
    {
      if ( edge_array[i].multi >= 2 && edge_array[i].multi != 255 )
        {
          qsort ( edge_array[i].markers, ( int ) edge_array[i].multi, sizeof ( long long ), comp );
        }
    }

  fprintf ( stderr, "%lld markers loaded.\n", markCounter );
  free ( ( void * ) freadBuf );
  return 1;
}
void output_contig (EDGE * ed_array, unsigned int ed_num, char *outfile, int cut_len) 
{
	char temp[256];

	FILE * fp, *fp_contig;
	int flag, count, len_c;
	int signI;
	unsigned int i, j, diff_len=0;
	long long sum = 0, N90, N50;
	unsigned int *length_array;

	boolean tip;
	sprintf (temp, "%s.contig", outfile);
	fp = ckopen (temp, "w");

	index_array = (unsigned int *)ckalloc((ed_num+1)*sizeof(unsigned int));
	unsigned int * all_length_arr = (unsigned int*) ckalloc((ed_num+1)*sizeof(unsigned int));
	flag_array = (unsigned int*)ckalloc((ed_num+1)*sizeof(unsigned int));

	for (i=1; i<=ed_num; ++i)
	{
		index_array[i] = ed_array[i].length;
		all_length_arr[i] = ed_array[i].length;
	}

	qsort(&all_length_arr[1], ed_num, sizeof(all_length_arr[0]), cmp_int);

	for (i=1; i<=ed_num; ++i)
	{
		for (j=i+1; j<=ed_num; ++j)
		{
			if (all_length_arr[i] != all_length_arr[j])
				break;
		}
		all_length_arr[++diff_len] = all_length_arr[i];
		flag_array[diff_len] = i;
		i = j-1;

	}

	for (i=1; i<=ed_num; ++i)
	{
		index_array[i] = uniqueLenSearch(all_length_arr, flag_array, diff_len, index_array[i]);
	}

	for (i=1; i<=ed_num; ++i)
	{
		flag_array[index_array[i]] = i;
	}

	free((void*)all_length_arr);

	length_array = (unsigned int *) ckalloc (ed_num * sizeof (unsigned int));
	kmerSeq = (char *) ckalloc (overlaplen * sizeof (char));

	count = len_c = 0;
	for (i = 1; i <= ed_num; i++)
	{
		if ((ed_array[i].length + overlaplen) >= len_bar)	
		{
			length_array[len_c++] = ed_array[i].length + overlaplen;
		}
		if (ed_array[i].length < 1 || ed_array[i].deleted)	
		{
			continue;
		}
		count++;
		if (EdSmallerThanTwin (i))		
		{
			i++;
		}
	}
	sum = 0;
	for (signI = len_c - 1; signI >= 0; signI--)	
	{
		sum += length_array[signI];
	}
	
	qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );

	if ( len_c > 0 )
	{
		printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c );
		printf ( "the longest is %dbp, ", length_array[len_c - 1] );
	}
	
	N50 = sum * 0.5;
	N90 = sum * 0.9;
	sum = flag = 0;
	for (signI = len_c - 1; signI >= 0; signI--)
	{
		sum += length_array[signI];
		if (!flag && sum >= N50)
		{
			printf ("contig N50 is %d bp,", length_array[signI]);
			flag = 1;
		}
		if (sum >= N90)	
		{
			printf ("contig N90 is %d bp\n", length_array[signI]);
			break;
		}
	}
	
	for (i = 1; i <= ed_num; i++)
	{
		j = flag_array[i];
		if (ed_array[j].deleted || ed_array[j].length < 1)	
		{
			continue;
		}
		if (ed_array[j].arcs && ed_array[getTwinEdge (j)].arcs)
		{
			tip = 0;
		}
		
		else	
		{
			tip = 1;
		}
		output_1contig (i, &(ed_array[j]), fp, tip);
		if (EdSmallerThanTwin (j))		
		{
			i++;
		}
	}

	fclose (fp);
	free ((void *) kmerSeq);
	free ((void *) length_array);
	printf ("%d contigs longer than %d output\n", count, cut_len);
	sprintf (temp, "%s.ContigIndex", outfile);
	fp_contig = ckopen (temp, "w");
	fprintf (fp_contig, "Edge_num %d %d\n", ed_num, count);
	fprintf (fp_contig, "index\tlength\treverseComplement\n");

	for (i = 1; i <= num_ed; i++)
	{
		j = flag_array[i];
		fprintf (fp_contig, "%d\t%d\t", i, edge_array[j].length + overlaplen);
		if (EdSmallerThanTwin (j))
		{
			fprintf (fp_contig, "1\n");
			i++;
		}
		
		else if (EdLargerThanTwin (j))
		{
			fprintf (fp_contig, "-1\n");
		}
		
		else
		{
			fprintf (fp_contig, "0\n");
		}
	}
	fclose (fp_contig);
}