示例#1
0
文件: main.cpp 项目: mourisl/Rascaf
int main( int argc, char *argv[] )
{
	int i ;
	int ret ;

	Alignments alignments ;
	Alignments clippedAlignments ;
	Blocks blocks ;
	Genome genome ;
	char *genomeFile = NULL ;
	
	if ( argc < 2 )
	{
		printf( "%s", usage ) ;
		exit( 0 ) ;
	}

	minimumSupport = 2 ;
	minimumEffectiveLength = 200 ;
	kmerSize = 23 ;
	breakN = 1 ;
	minContigSize = 200 ;
	prefix = NULL ;
	VERBOSE = false ;
	outputConnectionSequence = false ;
	aggressiveMode = false ;

	for ( i = 1 ; i < argc ; ++i )
	{
		if ( !strcmp( "-b", argv[i] ) )
		{
			alignments.Open( argv[i + 1]) ;
			++i ;
		}
		else if ( !strcmp( "-o", argv[i] ) )
		{
			prefix = argv[i + 1] ;
			++i ;
		}
		else if ( !strcmp( "-f", argv[i] ) )
		{
			genomeFile = argv[i + 1] ;
			++i ;
		}
		else if ( !strcmp( "-ms", argv[i] ) )
		{
			minimumSupport = atoi( argv[i + 1] ) ;
			++i ;
		}
		else if ( !strcmp( "-ml", argv[i] ) )
		{
			minimumEffectiveLength = atoi( argv[i + 1] ) ;
			++i ;
		}
		else if ( !strcmp( "-k", argv[i] ) )
		{
			kmerSize = atoi( argv[i + 1] ) ;
			++i ;
		}
		else if ( !strcmp( "-breakN", argv[i] ) )
		{
			breakN = atoi( argv[i + 1] ) ;
			++i ;
		}
		else if ( !strcmp( "-minContigSize", argv[i] ) )
		{
			minContigSize = atoi( argv[i + 1] ) ;
			++i ;
		}
		else if ( !strcmp( "-v", argv[i] ) )
		{
			VERBOSE = true ;
		}
		else if ( !strcmp( "-cs", argv[i] ) )
		{
			outputConnectionSequence = true ;
		}
		/*else if ( !strcmp( "-aggressive", argv[i] ) )
		{
			aggressiveMode = true ;
		}*/
		else if ( !strcmp( "-bc", argv[i] ) )
		{
			// So far, assume the input is from BWA mem
			clippedAlignments.Open( argv[i + 1] ) ;
			clippedAlignments.SetAllowSupplementary( true ) ;
			++i ;
		}
		else
		{
			fprintf( stderr, "Unknown parameter: %s\n", argv[i] ) ;
			exit( 1 ) ;
		}
	}

	if ( !alignments.IsOpened() )
	{
		printf( "Must use -b to specify the bam file." ) ;
		return 0 ;
	}

	if ( prefix != NULL )
	{
		char buffer[255] ;
		sprintf( buffer, "%s.out", prefix ) ;
		fpOut = fopen( buffer, "w" ) ;
	}
	else
	{
		char buffer[255] ;
		prefix = strdup( "rascaf" ) ;
		sprintf( buffer, "%s.out", prefix ) ;
		fpOut = fopen( buffer, "w" ) ;
	}
	
	if ( genomeFile != NULL )
	{
		genome.Open( alignments, genomeFile ) ;
		alignments.Rewind() ;
	}

	if ( outputConnectionSequence == true && genomeFile == NULL )
	{
		fprintf( stderr, "Must use -f to specify assembly file when using -cs\n" ) ;	
		exit( EXIT_FAILURE ) ;
	}
	// 74619
	//printf( "%c\n", genome.GetNucleotide( 74619, 4 ) ) ;
	//exit(0) ;
	// Build the graph
	ret = blocks.BuildExonBlocks( alignments, genome ) ;
	alignments.Rewind() ;
	fprintf( stderr, "Found %d exon blocks.\n", ret ) ;
	if ( clippedAlignments.IsOpened() )
	{
		fprintf( stderr, "Extend exon blocks with clipped alignments.\n" ) ;
		Blocks extendBlocks ;
		extendBlocks.BuildExonBlocks( clippedAlignments, genome ) ;
		clippedAlignments.Rewind() ;

		ret = blocks.ExtendExonBlocks( extendBlocks ) ;
		fprintf( stderr, "Found %d exon blocks after extension.\n", ret ) ;
	}

	blocks.GetAlignmentsInfo( alignments ) ;
	alignments.Rewind() ;

	ret = blocks.BuildGeneBlocks( alignments, genome ) ;
	alignments.Rewind() ;
	fprintf( stderr, "Found %d gene blocks.\n", ret ) ;
	
	blocks.BuildGeneBlockGraph( alignments ) ;
	if ( clippedAlignments.IsOpened() )
	{
		blocks.AddGeneBlockGraphByClippedAlignments( clippedAlignments ) ; 
	}
	
	// Cleaning
	blocks.CleanGeneBlockGraph( alignments, genome ) ;

	// Scaffolding
	Scaffold scaffold( blocks, genome ) ;
	//scaffold.Init( blocks ) ;
	int componentCnt = scaffold.BuildComponent() ;
	fprintf( stderr, "Found %d non-trivial gene block components.\n", componentCnt ) ;
	// Possible for parallelization
	for ( i = 0 ; i < componentCnt ; ++i )
	{
		scaffold.ScaffoldComponent( i ) ;
	}
	
	scaffold.ScaffoldGenome() ;
	
	// Output the command line
	fprintf( fpOut, "command line: " ) ;
	char *fullpath = (char *)malloc( sizeof( char ) * 4096 ) ;
	for ( i = 0 ; i < argc ; ++i )
	{
		char c = ' ' ;
		if ( i == argc - 1 )
			c = '\n' ;
		if ( i > 0 && !strcmp( argv[i - 1], "-b" ) )
		{
			if ( realpath( argv[i], fullpath ) == NULL )
			{
				fprintf( stderr, "Failed to resolve the path of file %s.\n", argv[i] ) ;
				exit( 1 ) ;
			}
			fprintf( fpOut, "%s%c", fullpath, c ) ;
		}
		else if ( i > 0 && !strcmp( argv[i - 1], "-f" ) )
		{
			if ( realpath( argv[i], fullpath ) == NULL )
			{
				fprintf( stderr, "Failed to resolve the path of file %s.\n", argv[i] ) ;
				exit( 1 ) ;
			}
			fprintf( fpOut, "%s%c", fullpath, c ) ;
		}
		else
			fprintf( fpOut, "%s%c", argv[i], c ) ;
	}
	free( fullpath ) ;
	scaffold.Output( fpOut, alignments ) ;
	return 0 ;
}