Exemplo n.º 1
0
/// Reads a SNP list in .gff and puts the SNPs as IUPAC bases into the chromosome; superceded by incorporate_all_gff().
void TChromosome::incorporate_known_snps_gff ( string filename ) {
	if ( DEBUGGING ) { fprintf ( stdout , "Reading SNPS into %s from %s ... " , name.c_str() , filename.c_str() ) ; fflush(stdout); }

	FILE * file = fopen ( filename.c_str() , "r" ) ;
	int snpcnt = 0 ;
	char dummy[READ_CHAR_BUFFER] , *c1 , *c2 ;
	while ( !feof(file) ) {
		vector <string> parts , parts2 ;
		*dummy = 0 ;
		fgets ( dummy , READ_CHAR_BUFFER , file ) ;
		Tokenize ( dummy , parts , "\t" ) ;
		if ( parts.size() < 7 ) continue ; // These is not the data you are looking for
		if ( parts[0] != name ) continue ; // Wrong chromosome
		if ( parts[2] != "SNP" ) continue ; // Not a SNP
		if ( parts[3] != parts[4] ) continue ; // Not a single point
		if ( parts[5] != "+" ) continue ; // Wrong strand (?)
		int pos = atoi ( parts[3].c_str() ) ;
		
		char merge = sequence[pos-1] ;
		Tokenize ( parts[6].c_str() , parts2 , "\"\" \"\"" ) ;
		bool found_reference = false ;
		for ( int b = 0 ; b < parts2.size() ; b++ ) {
			int l = parts2[b].length() ;
			if ( l < 2 ) continue ;
			if ( parts2[b][l-2] != ':' ) continue ;
			char c = parts2[b][l-1] ;
			if ( c == sequence[pos-1] ) found_reference = true ;
			merge = MERGE_IUPAC(merge,c) ;
		}
		if ( !found_reference ) { // Strange; maybe original sequence contains "N"
			// We should count this!
			continue ;  
		}
		sequence[pos-1] = merge ;
		snpcnt++ ;
	}
	fclose ( file ) ;
	if ( DEBUGGING ) { fprintf ( stdout , "incorporated %d SNPs.\n" , snpcnt ) ; fflush(stdout); }
}
Exemplo n.º 2
0
/// Reads a SNP list in simple format and puts the SNPs as IUPAC bases into the chromosome.
void TChromosome::incorporate_known_snps ( string filename ) {
	if ( DEBUGGING ) { fprintf ( stdout , "Reading from %s ... " , filename.c_str() ) ; fflush(stdout); }

	FILE * file = fopen ( filename.c_str() , "r" ) ;
	char dummy[READ_CHAR_BUFFER] , *c1 , *c2 ;
	int snpcnt = 0 ;
	while ( !feof(file) ) {
		vector <string> parts ;
		*dummy = 0 ;
		fgets ( dummy , READ_CHAR_BUFFER , file ) ;
		Tokenize ( dummy , parts , "\t" ) ;
		if ( parts.size() != 4 ) continue ;
		if ( name != parts[0] ) continue ;
		int pos = atoi ( parts[1].c_str() ) ;
		uchar orig = parts[2][0] ;
		uchar snp = parts[3][0] ;
		sequence[pos-1] = MERGE_IUPAC(orig,snp) ;
		snpcnt++ ;
	}
	fclose ( file ) ;

	if ( DEBUGGING ) { fprintf ( stdout , "incorporated %d SNPs.\n" , snpcnt ) ; fflush(stdout); }
}
Exemplo n.º 3
0
void TAlignmentOutput::show_pileup ( FILE *pileup , bool snps_only ) {
	int search_snps = 0 , found_snps = 0 , bogus = 0 ;
	uint base , l , cnt , p , lastns ;
	uchar common ;
	char *s2 = new char[MAXLINES+5] ;
	char *s3 = new char[MAXLINES+5] ;
	char *t , *u ;
	
	uint count[256] ;

	for ( base = 0 ; base < chr->sequence.length() ; base++ ) {
		if ( snps_only && !isIUPAC[chr->sequence[base]] ) continue ;
		if ( isIUPAC[chr->sequence[base]] ) search_snps++ ;
		count['A']=count['C']=count['G']=count['T']=0 ;
		cnt = 0 ;
		common = ' ' ;
		lastns = 0 ;
		p = twotoone(0,base) ;
		t = s2 ;
		u = s3 ;
		char *c = align + p ;
		char *q = qalign + p ;
		for ( l = 0 ; l < MAXLINES ; l++ , c++ , q++ ) {
			if ( *c && *c != ' ' ) {
				lastns = l ;
				cnt++ ;
				count[*c]++ ;
				common = MERGE_IUPAC ( *c , common ) ;
				if ( *q < MINQUAL + 33 ) *t++ = to_lc[*c] ;
				else *t++ = *c ;
				*u++ = *q ;
			} else {
				*t++ = ' ' ;
				*u++ = ' ' ;
			}
		}
		s2[lastns+1] = 0 ;
		s3[lastns+1] = 0 ;
		
		bool confirmed_snp = false ;
		uint n = 0 ;
		if ( count['A'] >= MINOCCUR ) n++ ;
		if ( count['C'] >= MINOCCUR ) n++ ;
		if ( count['G'] >= MINOCCUR ) n++ ;
		if ( count['T'] >= MINOCCUR ) n++ ;
		if ( n > 1 ) {
			confirmed_snp = true ;
			found_snps++ ;
		}
		
/*		
		if ( common != ' ' && isIUPAC[common] ) {
			confirmed_snp = true ;
			found_snps++ ;
		}
*/

		if ( common != ' ' && isIUPAC[common] && !isIUPAC[chr->sequence[base]] ) bogus++ ;

		if ( snps_only && common == ' ' ) continue ;
		
		char mark = confirmed_snp ? '*' : ' ' ;
		
		string ref ;
		if ( !chr->original_sequence.empty() ) {
			ref += chr->original_sequence[base] ;
			ref += "\t" ;
		}
		ref += chr->sequence[base] ;

		fprintf ( pileup , "%s\t%s\t%d\t%c%c\t%d\t" , chr->name.c_str() , ref.c_str() , base+1 , common , mark , cnt ) ;
		fprintf ( pileup , "%s\t%s\n" , s2 , s3 ) ;
	}
}