Ejemplo n.º 1
0
int main (int argc, char *argv[]) {
    if( (argc== 1) ||
	(argc== 2 && string(argv[1]) == "-h") ||
	(argc== 2 && string(argv[1]) == "-help") ||
	(argc== 2 && string(argv[1]) == "--help") ){
	cout<<"Usage:"<<endl;
	cout<<""<<endl;
	cout<<"plotQualScore input.bam"<<endl;
	return 1;
    }

    string bamfiletopen = string(argv[1]);
    BamReader reader;

    if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
    }

    // if ( !reader.LocateIndex() ){
    // 	cerr << "warning: cannot locate index for file " << bamfiletopen<<endl;
    // 	//return 1;
    // }

    BamAlignment al;
    BamAlignment al2;
    
    bool unsurePEorSE=true;
    bool pe=true;
    int strLength=-1;
    int vecLengthToUse=-1;

    map<short,unsigned long>  ** counterA = 0;
    map<short,unsigned long>  ** counterC = 0;
    map<short,unsigned long>  ** counterG = 0;
    map<short,unsigned long>  ** counterT = 0;
    
    int lengthIndex1=0;
    int lengthIndex2=0;
    string seqInd1;
    string seqInd2;
    string qualInd1;
    string qualInd2;
    int offsetInd2;

    while ( reader.GetNextAlignment(al) ) {
	if(unsurePEorSE){
	    strLength=al.QueryBases.length();
	    if(al.IsPaired()){
		pe=true;
		vecLengthToUse=2*strLength;		
	    }else{
		pe=false;
		vecLengthToUse=strLength;
	    }
	    string index1;
	    string index2;
	
	    if(al.HasTag("XI")){
		al.GetTag("XI",index1);
		vecLengthToUse+=index1.length();
		lengthIndex1=index1.length();
	    }

	    if(al.HasTag("XJ")){
		al.GetTag("XJ",index2);
		vecLengthToUse+=index2.length();
		lengthIndex2=index2.length();
	    }

	    counterA     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterC     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterG     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterT     = new map<short,unsigned long>  * [vecLengthToUse];
	    for(int i=0;i<vecLengthToUse;i++){
		counterA[i]=new map<short,unsigned long>  ();
		counterC[i]=new map<short,unsigned long>  ();
		counterG[i]=new map<short,unsigned long>  ();
		counterT[i]=new map<short,unsigned long>  ();
		for(short k=minQualScore;k<=maxQualScore;k++){
		    (*counterA[i])[k]=0;
		    (*counterC[i])[k]=0;
		    (*counterG[i])[k]=0;
		    (*counterT[i])[k]=0;
		}
	    }
	    unsurePEorSE=false;
	}else{
	    if(pe  &&
	       !al.IsPaired()){
		cerr << "Cannot have unpaired reads in PE mode" << endl;
		return 1;
	    }

	    if(!pe  &&
	       al.IsPaired()){
		cerr << "Cannot have unpaired reads in SE mode" << endl;
		return 1;
	    }
	}
	
	if(al.QueryBases.length() !=  al.Qualities.length()){
	    cerr << "Cannot have different lengths for sequence and quality" << endl;
	    return 1;
	}
	if(int(al.QueryBases.length()) !=  strLength){
	    cerr << "Cannot have different lengths for sequence and quality" << endl;
	    return 1;
	}

	if(pe){
	    if(al.IsFirstMate()){
		reader.GetNextAlignment(al2);
		if(al2.QueryBases.length() !=  al2.Qualities.length()){
		    cerr << "Cannot have different lengths for sequence and quality" << endl;
		    return 1;
		}

	    }else{
		cerr << "First read should be the first mate" << endl;
		return 1;	    
	    }
	}



	//cycle
	for(unsigned int i=0;i<al.QueryBases.length();i++){
	    short x=(short(al.Qualities[i])-qualOffset);
	    if(al.QueryBases[i] == 'A'){
	    	(*counterA[i])[x]++;
	    }
	    if(al.QueryBases[i] == 'C'){
	    	(*counterC[i])[x]++;
	    }
	    if(al.QueryBases[i] == 'G'){
	    	(*counterG[i])[x]++;
	    }
	    if(al.QueryBases[i] == 'T'){
	    	(*counterT[i])[x]++;
	    }
	}

	//The indices for al and al2 should hopefully be the same 
	if(lengthIndex1>0){
	    al.GetTag("XI",seqInd1);
	    al.GetTag("YI",qualInd1);
	    int j;

	    for(int i=0;i<lengthIndex1;i++){
		j=i+al.QueryBases.length();
		short x=(short(qualInd1[i])-qualOffset);
		if(seqInd1[i] == 'A'){
		    (*counterA[j])[x]++;
		}
		if(seqInd1[i] == 'C'){
		    (*counterC[j])[x]++;
		}
		if(seqInd1[i] == 'G'){
		    (*counterG[j])[x]++;
		}
		if(seqInd1[i] == 'T'){
		    (*counterT[j])[x]++;
		}
	    }
	}

	if(pe){
	    offsetInd2=al.QueryBases.length()+lengthIndex1+al2.QueryBases.length();
	    int j;
	    for(unsigned int i=0;i<al2.QueryBases.length();i++){
		j=i+al.QueryBases.length()+lengthIndex1;
		short x=(short(al2.Qualities[i])-qualOffset);
		if(al2.QueryBases[i] == 'A'){
		    (*counterA[j])[x]++;
		}
		if(al2.QueryBases[i] == 'C'){
		    (*counterC[j])[x]++;
		}
		if(al2.QueryBases[i] == 'G'){
		    (*counterG[j])[x]++;
		}
		if(al2.QueryBases[i] == 'T'){
		    (*counterT[j])[x]++;
		}
	    }
	}else{
	    offsetInd2=al.QueryBases.length()+lengthIndex1;
	}

	//The indices for al and al2 should hopefully be the same 
	if(lengthIndex2>0){
	    al.GetTag("XJ",seqInd2);
	    al.GetTag("YJ",qualInd2);
	    int j;

	    for(int i=0;i<lengthIndex2;i++){
		j=offsetInd2+i;
		short x=(short(qualInd2[i])-qualOffset);
		if(seqInd2[i] == 'A'){
		    (*counterA[j])[x]++;
		}
		if(seqInd2[i] == 'C'){
		    (*counterC[j])[x]++;
		}
		if(seqInd2[i] == 'G'){
		    (*counterG[j])[x]++;
		}
		if(seqInd2[i] == 'T'){
		    (*counterT[j])[x]++;
		}
	    }
	}

    }
    reader.Close();

    cout<<"cycle\t"<<"nuc\t";
    for(short k=minQualScore;k<maxQualScore;k++){
	cout<<k<<"\t";
    }
    cout<<maxQualScore<<endl;

    for(int i=0;i<vecLengthToUse;i++){
	cout<<(i+1)<<"\t";
	cout<<"A\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterA[i])[k]<<"\t";
	}
	cout<<(*counterA[i])[maxQualScore]<<endl;
	cout<<(i+1)<<"\t";
	cout<<"C\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterC[i])[k]<<"\t";
	}
	cout<<(*counterC[i])[maxQualScore]<<endl;
	cout<<(i+1)<<"\t";
	cout<<"G\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterG[i])[k]<<"\t";
	}
	cout<<(*counterG[i])[maxQualScore]<<endl;
	cout<<(i+1)<<"\t";
	cout<<"T\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterT[i])[k]<<"\t";
	}
	cout<<(*counterT[i])[maxQualScore]<<endl;	
    }



    
    return 0;
}
Ejemplo n.º 2
0
int main (int argc, char *argv[]) {

    string usage=string(""+string(argv[0])+"  [in BAM file]"+
			"\nThis program reads a BAM file and computes the error rate for each cycle\n"+
			// "\nreads and the puts the rest into another bam file.\n"+
			// "\nTip: if you do not need one of them, use /dev/null as your output\n"+
			// "arguments:\n"+
			// "\t"+"--bq  [base qual]   : Minimum base quality to flag a deaminated site (Default: "+stringify(minBaseQuality)+")\n"+
			"\n");

    if(argc == 1 ||
       (argc == 2 && (string(argv[0]) == "-h" || string(argv[0]) == "--help") )
       ){
	cerr << "Usage "<<usage<<endl;
	return 1;       
    }


    // for(int i=1;i<(argc-2);i++){ 

	
    //     if(string(argv[i]) == "--bq"){
    // 	    minBaseQuality=destringify<int>(argv[i+1]);
    //         i++;
    //         continue;
    // 	}

    // }

    string bamfiletopen = string( argv[ argc-1 ] );
    // string deambam      = string( argv[ argc-2 ] );
    // string nondeambam   = string( argv[ argc-1 ] );


    BamReader reader;
    
    if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM file"<< bamfiletopen << endl;
    	return 1;
    }








    //iterating over the alignments for these regions
    BamAlignment al;
    bool pairedEnd=false;
    bool firstRead=true;

    while ( reader.GetNextAlignment(al) ) {

	if(firstRead){ //reads are either all paired end or single end, I don't allow a mix
	    numberOfCycles=al.QueryBases.size();
	    // cout<<"numberOfCycles "<<numberOfCycles<<endl;
	    if(al.IsPaired() ){  
		pairedEnd=true;		
		matches    = vector<unsigned int> (2*numberOfCycles,0);
		mismatches = vector<unsigned int> (2*numberOfCycles,0);
		typesOfMismatches = vector< vector<unsigned int> >();
		for(int i=0;i<12;i++)
		    typesOfMismatches.push_back( vector<unsigned int> (2*numberOfCycles,0) );
	    }else{
		matches    = vector<unsigned int> (  numberOfCycles,0);
		mismatches = vector<unsigned int> (  numberOfCycles,0);
		typesOfMismatches = vector< vector<unsigned int> >();
		for(int i=0;i<12;i++)
		    typesOfMismatches.push_back( vector<unsigned int> (  numberOfCycles,0) );
	    }
	    firstRead=false;
	}

	if( (  pairedEnd && !al.IsPaired()) ||  
	    ( !pairedEnd &&  al.IsPaired())   ){
	    cerr<<"Read "<<al.Name<<" is wrong, cannot have a mixture of paired and unpaired read for this program"<<endl;
	    return 1;
	}


	//skip unmapped
	if(!al.IsMapped())
	    continue;

	if(numberOfCycles!=int(al.QueryBases.size())){
	    cerr<<"The length of read "<<al.Name<<" is wrong, should be "<<numberOfCycles<<"bp"<<endl;
	    return 1;
	}


	string reconstructedReference = reconstructRef(&al);
	if(al.Qualities.size() != reconstructedReference.size()){
	    cerr<<"Quality line is not the same size as the reconstructed reference"<<endl;
	    return 1;
	}


	if( pairedEnd ){
	    if( al.IsFirstMate() ){ //start cycle 0

		if( al.IsReverseStrand()  ){ 
		    increaseCounters(al,reconstructedReference,numberOfCycles-1,-1); //start cycle numberOfCycles-1
		}else{
		    increaseCounters(al,reconstructedReference,0               , 1); //start cycle 0
		}
       
	    }else{

		if( al.IsSecondMate()  ){ 
		    if( al.IsReverseStrand()  ){ 
			increaseCounters(al,reconstructedReference,2*numberOfCycles-1,-1); //start cycle 2*numberOfCycles-1
		    }else{
			increaseCounters(al,reconstructedReference,numberOfCycles    , 1); //start cycle numberOfCycles
		    }
		}else{
	    	    cerr<<"Reads "<<al.Name<<" must be either first or second mate"<<endl;
	    	    return 1;
	    	}
	    }	
	}else{ //single end 

	    if( al.IsReverseStrand()  ){ 
		increaseCounters(al,reconstructedReference,numberOfCycles-1,-1); //start cycle numberOfCycles-1
	    }else{
		increaseCounters(al,reconstructedReference,0               , 1); //start cycle 0
	    }
	    
	}


    }//end while  each read
	


    reader.Close();

    cout<<"cycle\tmatches\tmismatches\tmismatches%\tA>C\tA>C%\tA>G\tA>G%\tA>T\tA>T%\tC>A\tC>A%\tC>G\tC>G%\tC>T\tC>T%\tG>A\tG>A%\tG>C\tG>C%\tG>T\tG>T%\tT>A\tT>A%\tT>C\tT>C%\tT>G\tT>G%"<<endl;



    for(unsigned int i=0;i<matches.size();i++){
cout<<(i+1);
	if( (matches[i]+mismatches[i]!=0) )
	    cout<<"\t"<<matches[i]<<"\t"<<mismatches[i]<<"\t"<< 100.0*(double(mismatches[i])/double(matches[i]+mismatches[i])) ;
	else
	    cout<<"\t"<<matches[i]<<"\t"<<mismatches[i]<<"\tNA";

	for(int j=0;j<12;j++){   
	    cout<<"\t"<<typesOfMismatches[j][i];
	    if( (matches[i]+mismatches[i]!=0) )
		cout<<"\t"<<100.0*double(typesOfMismatches[j][i])/double(matches[i]+mismatches[i]);
	    else
		cout<<"\tNA";
	}

	cout<<endl;
    }




   
    return 0;
}
Ejemplo n.º 3
0
int main (int argc, char *argv[]) {

    bool mapped  =false;
    bool unmapped=false;

    const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+
			      "This program takes a BAM file as input and produces\n"+
			      "another where the putative deaminated bases have\n"+
			      "have been cut\n"+
			      "\n"+
			      "Options:\n");
			      // "\t"+"-u , --unmapped" +"\n\t\t"+"For an unmapped bam file"+"\n"+
			      // "\t"+"-m , --mapped"   +"\n\t\t"+"For an mapped bam file"+"\n");
			      
			      

    if( (argc== 1) ||
	(argc== 2 && string(argv[1]) == "-h") ||
	(argc== 2 && string(argv[1]) == "-help") ||
	(argc== 2 && string(argv[1]) == "--help") ){
	cout<<"Usage:"<<endl;
	cout<<usage<<endl;
	cout<<""<<endl;
	return 1;
    }

    // for(int i=1;i<(argc-1);i++){ //all but the last arg

    // 	if(strcmp(argv[i],"-m") == 0 || strcmp(argv[i],"--mapped") == 0 ){
    // 	    mapped=true;
    // 	    continue;
    // 	}

    // 	if(strcmp(argv[i],"-u") == 0 || strcmp(argv[i],"--unmapped") == 0 ){
    // 	    unmapped=true;
    // 	    continue;
    // 	}
       
    // 	cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
    // 	return 1;
    // }

    if(argc != 3){
	cerr<<"Error: Must specify the input and output BAM files";
	return 1;
    }

    string inbamFile =argv[argc-2];
    string outbamFile=argv[argc-1];

    // if(!mapped && !unmapped){
    // 	cerr << "Please specify whether you reads are mapped or unmapped" << endl;
    // 	return 1;
    // }
    // if(mapped && unmapped){
    // 	cerr << "Please specify either mapped or unmapped but not both" << endl;
    // 	return 1;
    // }

    BamReader reader;

    if ( !reader.Open(inbamFile) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
    }


    vector<RefData>  testRefData=reader.GetReferenceData();
    const SamHeader header = reader.GetHeader();
    const RefVector references = reader.GetReferenceData();

    BamWriter writer;
    if ( !writer.Open(outbamFile, header, references) ) {
	cerr << "Could not open output BAM file" << endl;
	return 1;
    }

    BamAlignment al;
    // BamAlignment al2;
    // bool al2Null=true;
    
    while ( reader.GetNextAlignment(al) ) {

	    if(al.IsPaired() ){  

		if(al.IsFirstMate() ){ //5' end, need to check first base only
		    if(al.IsReverseStrand()){ //
			if(!al.IsMapped()){
			    cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl;
			    //return 1;
			}
			int indexToCheck;
			//last
			indexToCheck=al.QueryBases.length()-1;
			if(toupper(al.QueryBases[indexToCheck]) == 'A'){
			    //al.Qualities[indexToCheck]=char(offset+baseQualForDeam);			 
			    al.QueryBases = al.QueryBases.substr(0,indexToCheck);
			    al.Qualities  = al.Qualities.substr(0, indexToCheck);
			}
		    }else{
			int indexToCheck;
			//first base
			indexToCheck=0;
			if(toupper(al.QueryBases[indexToCheck]) == 'T'){
			    //al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			    al.QueryBases = al.QueryBases.substr(indexToCheck+1);
			    al.Qualities  = al.Qualities.substr(indexToCheck+1);
			}
		    }
		}else{ //3' end, need to check last two bases only
		    if( al.IsSecondMate() ){
			if(al.IsReverseStrand()){ //
			    if(!al.IsMapped()){
				cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl;
				//return 1;
			    }
			    int indexToCheck;

			    //second to last
			    indexToCheck=al.QueryBases.length()-2;
			    if(toupper(al.QueryBases[indexToCheck]) == 'T'){
				//al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
				al.QueryBases = al.QueryBases.substr(0,indexToCheck);
				al.Qualities  = al.Qualities.substr(0, indexToCheck);
			    }else{
				//last
				indexToCheck=al.QueryBases.length()-1;
				if(toupper(al.QueryBases[indexToCheck]) == 'T'){
				    //al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
				    al.QueryBases = al.QueryBases.substr(0,indexToCheck);
				    al.Qualities  = al.Qualities.substr(0, indexToCheck);
				}
			    }
			}else{
			    int indexToCheck;
			    //second base
			    indexToCheck=1;
			    if(toupper(al.QueryBases[indexToCheck]) == 'A'){
				//al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
				al.QueryBases = al.QueryBases.substr(indexToCheck+1);
				al.Qualities  = al.Qualities.substr(indexToCheck+1);
			    }else{
				//first base
				indexToCheck=0;
				if(toupper(al.QueryBases[indexToCheck]) == 'A'){
				    //al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
				    al.QueryBases = al.QueryBases.substr(indexToCheck+1);
				    al.Qualities  = al.Qualities.substr(indexToCheck+1);
				}
			    }

			}
		    }else{
			cerr << "Wrong state" << endl;
			return 1;
		    }
		}

	    }//end of paired end
	    else{//we consider single reads to have been sequenced from 5' to 3'

		if(al.IsReverseStrand()){ //need to consider 
		    if(!al.IsMapped()){
			cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl;
			//return 1;
		    }

		    int indexToCheck;



		    //second base
		    indexToCheck=1;
		    if(toupper(al.QueryBases[indexToCheck]) == 'A'){
			// al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			// cout<<"51 "<<al.QueryBases<<endl;
			// cout<<"51 "<<al.Qualities<<endl;
			al.QueryBases = al.QueryBases.substr(indexToCheck+1);
			al.Qualities  = al.Qualities.substr(indexToCheck+1);
			// cout<<"52 "<<al.QueryBases<<endl;
			// cout<<"52 "<<al.Qualities<<endl;
		    }else{
			//first base
			indexToCheck=0;
			if(toupper(al.QueryBases[indexToCheck]) == 'A'){
			    // al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			    // cout<<"61 "<<al.QueryBases<<endl;
			    // cout<<"61 "<<al.Qualities<<endl;
			    al.QueryBases = al.QueryBases.substr(indexToCheck+1);
			    al.Qualities  = al.Qualities.substr(indexToCheck+1);
			    // cout<<"62 "<<al.QueryBases<<endl;
			    // cout<<"62 "<<al.Qualities<<endl;
			}

		    }


		    //last
		    indexToCheck=al.QueryBases.length()-1;
		    if(toupper(al.QueryBases[indexToCheck]) == 'A'){
			// al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			// cout<<"21 "<<al.QueryBases<<endl;
			// cout<<"21 "<<al.Qualities<<endl;
			al.QueryBases = al.QueryBases.substr(0,indexToCheck);
			al.Qualities  = al.Qualities.substr(0, indexToCheck);
			// cout<<"22 "<<al.QueryBases<<endl;
			// cout<<"22 "<<al.Qualities<<endl;
		    }
		}else{

		    int indexToCheck;
		    //first base
		    indexToCheck=0;
		    if(toupper(al.QueryBases[indexToCheck]) == 'T'){
			// al.Qualities[indexToCheck]=char(offset+baseQualForDeam);

			// cout<<"11 "<<al.QueryBases<<endl;
			// cout<<"11 "<<al.Qualities<<endl;
			al.QueryBases = al.QueryBases.substr(indexToCheck+1);
			al.Qualities  = al.Qualities.substr(indexToCheck+1);
			// cout<<"12 "<<al.QueryBases<<endl;
			// cout<<"12 "<<al.Qualities<<endl;

		    }

		    //second to last
		    indexToCheck=al.QueryBases.length()-2;
		    if(toupper(al.QueryBases[indexToCheck]) == 'T'){
			// al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			// cout<<"31 "<<al.QueryBases<<endl;
			// cout<<"31 "<<al.Qualities<<endl;
			al.QueryBases = al.QueryBases.substr(0,indexToCheck);
			al.Qualities  = al.Qualities.substr(0, indexToCheck);
			// cout<<"32 "<<al.QueryBases<<endl;
			// cout<<"32 "<<al.Qualities<<endl;
		    }else{

			//last
			indexToCheck=al.QueryBases.length()-1;
			if(toupper(al.QueryBases[indexToCheck]) == 'T'){
			    // al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			    // cout<<"41 "<<al.QueryBases<<endl;
			    // cout<<"41 "<<al.Qualities<<endl;
			    al.QueryBases = al.QueryBases.substr(0,indexToCheck);
			    al.Qualities  = al.Qualities.substr(0, indexToCheck);
			    // cout<<"42 "<<al.QueryBases<<endl;
			    // cout<<"42 "<<al.Qualities<<endl;
			}

		    }
		}
	    }//end of single end

	    writer.SaveAlignment(al);		


    }//    while ( reader.GetNextAlignment(al) ) {

    reader.Close();
    writer.Close();
   
    return 0;
}
Ejemplo n.º 4
0
	virtual void main()
	{
		//init
		QTextStream out(stdout);
		BamReader reader;
		NGSHelper::openBAM(reader, getInfile("in"));

		FastqOutfileStream out1(getOutfile("out1"), false);
		FastqOutfileStream out2(getOutfile("out2"), false);

		long long c_unpaired = 0;
		long long c_paired = 0;
		int max_cached = 0;

		//iterate through reads
		BamAlignment al;
		QHash<QByteArray, BamAlignment> al_cache;
		while (reader.GetNextAlignment(al))
		{
			//skip secondary alinments
			if(!al.IsPrimaryAlignment()) continue;

			//skip unpaired
			if(!al.IsPaired())
			{
				++c_unpaired;
				continue;
			}

			QByteArray name(al.Name.data()); //TODO use QByteArray::fromStdString (when upgraded to Qt5.4)

			//store cached read when we encounter the mate
			if (al_cache.contains(name))
			{
				BamAlignment mate = al_cache.take(name);
				//out << name << " [AL] First: " << al.IsFirstMate() << " Reverse: " << al.IsReverseStrand() << " Seq: " << al.QueryBases.data() << endl;
				//out << name << " [MA] First: " << mate.IsFirstMate() << " Reverse: " << mate.IsReverseStrand() << " Seq: " << mate.QueryBases.data() << endl;
				if (al.IsFirstMate())
				{
					write(out1, al, al.IsReverseStrand());
					write(out2, mate, mate.IsReverseStrand());
				}
				else
				{
					write(out1, mate, mate.IsReverseStrand());
					write(out2, al, al.IsReverseStrand());
				}
				++c_paired;
			}
			//cache read for later retrieval
			else
			{
				al_cache.insert(name, al);
			}

			max_cached = std::max(max_cached, al_cache.size());
		}
		reader.Close();
		out1.close();
		out2.close();

		//write debug output
		out << "Pair reads (written)            : " << c_paired << endl;
		out << "Unpaired reads (skipped)        : " << c_unpaired << endl;
		out << "Unmatched paired reads (skipped): " << al_cache.size() << endl;
		out << endl;
		out << "Maximum cached reads            : " << max_cached << endl;
	}
Ejemplo n.º 5
0
void BedWindow::WindowIntersectBam(string bamFile) {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bedB->loadBedFileIntoMap();

    // open the BAM file
    BamReader reader;
    BamWriter writer;
    reader.Open(bamFile);

    // get header & reference information
    string bamHeader  = reader.GetHeaderText();
    RefVector refs    = reader.GetReferenceData();

    // open a BAM output to stdout if we are writing BAM
    if (_bamOutput == true) {
        // set compression mode
        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
        writer.SetCompressionMode(compressionMode);
        // open our BAM writer
        writer.Open("stdout", bamHeader, refs);
    }

    vector<BED> hits;                   // vector of potential hits
    // reserve some space
    hits.reserve(100);

    _bedA->bedType = 6;
    BamAlignment bam;
    bool overlapsFound;
    // get each set of alignments for each pair.
    while (reader.GetNextAlignment(bam)) {

        if (bam.IsMapped()) {
            BED a;
            a.chrom = refs.at(bam.RefID).RefName;
            a.start = bam.Position;
            a.end   = bam.GetEndPosition(false, false);

            // build the name field from the BAM alignment.
            a.name = bam.Name;
            if (bam.IsFirstMate()) a.name += "/1";
            if (bam.IsSecondMate()) a.name += "/2";

            a.score  = ToString(bam.MapQuality);
            a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";

            if (_bamOutput == true) {
                overlapsFound = FindOneOrMoreWindowOverlaps(a);
                if (overlapsFound == true) {
                    if (_noHit == false)
                        writer.SaveAlignment(bam);
                }
                else {
                    if (_noHit == true)
                        writer.SaveAlignment(bam);
                }
            }
            else {
                FindWindowOverlaps(a, hits);
                hits.clear();
            }
        }
        // BAM IsMapped() is false
        else if (_noHit == true) {
            writer.SaveAlignment(bam);
        }
    }

    // close the relevant BAM files.
    reader.Close();
    if (_bamOutput == true) {
        writer.Close();
    }
}
Ejemplo n.º 6
0
void BedGenomeCoverage::CoverageBam(string bamFile) {

    ResetChromCoverage();

    // open the BAM file
    BamReader reader;
    if (!reader.Open(bamFile)) {
        cerr << "Failed to open BAM file " << bamFile << endl;
        exit(1);
    }

    // get header & reference information
    string header = reader.GetHeaderText();
    RefVector refs = reader.GetReferenceData();

    // load the BAM header references into a BEDTools "genome file"
    _genome = new GenomeFile(refs);
    // convert each aligned BAM entry to BED
    // and compute coverage on B
    BamAlignment bam;
    while (reader.GetNextAlignment(bam)) {
        // skip if the read is unaligned
        if (bam.IsMapped() == false)
            continue;

        bool _isReverseStrand = bam.IsReverseStrand();

        //changing second mate's strand to opposite
        if( _dUTP && bam.IsPaired() && bam.IsMateMapped() && bam.IsSecondMate())
            _isReverseStrand = !bam.IsReverseStrand();

        // skip if we care about strands and the strand isn't what
        // the user wanted
        if ( (_filterByStrand == true) &&
             ((_requestedStrand == "-") != _isReverseStrand) )
            continue;

        // extract the chrom, start and end from the BAM alignment
        string chrom(refs.at(bam.RefID).RefName);
        CHRPOS start = bam.Position;
        CHRPOS end = bam.GetEndPosition(false, false) - 1;

        // are we on a new chromosome?
        if ( chrom != _currChromName )
            StartNewChrom(chrom);
        if(_pair_chip_) {
            // Skip if not a proper pair
            if (bam.IsPaired() && (!bam.IsProperPair() or !bam.IsMateMapped()) )
                continue;
            // Skip if wrong coordinates
            if( ( (bam.Position<bam.MatePosition) && bam.IsReverseStrand() ) ||
                ( (bam.MatePosition < bam.Position) && bam.IsMateReverseStrand() ) ) {
                    //chemically designed: left on positive strand, right on reverse one
                    continue;
            }

            /*if(_haveSize) {
                if (bam.IsFirstMate() && bam.IsReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
                    int mid = bam.MatePosition+abs(bam.InsertSize)/2;
                    if(mid<_fragmentSize/2)
                        AddCoverage(0, mid+_fragmentSize/2);
                    else
                        AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
                }
                else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
                    int mid = start+abs(bam.InsertSize)/2;
                    if(mid<_fragmentSize/2)
                        AddCoverage(0, mid+_fragmentSize/2);
                    else
                        AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
                }
            } else */

            if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left
                AddCoverage(bam.MatePosition, end);
            }
            else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right
                AddCoverage(start, start + abs(bam.InsertSize) - 1);
            }
        } else if (_haveSize) {
            if(bam.IsReverseStrand()) {
                if(end<_fragmentSize) { //sometimes fragmentSize is bigger :(
                    AddCoverage(0, end);
                } else {
                    AddCoverage(end + 1 - _fragmentSize, end );
                }
            } else {
                AddCoverage(start,start+_fragmentSize - 1);
            }
        } else
        // add coverage accordingly.
        if (!_only_5p_end && !_only_3p_end) {
            bedVector bedBlocks;
            // we always want to split blocks when a D CIGAR op is found.
            // if the user invokes -split, we want to also split on N ops.
            if (_obeySplits) { // "D" true, "N" true
                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
            }
            else { // "D" true, "N" false
                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
            }
            AddBlockedCoverage(bedBlocks);
        }
        else if (_only_5p_end) {
            CHRPOS pos = ( !bam.IsReverseStrand() ) ? start : end;
            AddCoverage(pos,pos);
        }
        else if (_only_3p_end) {
            CHRPOS pos = ( bam.IsReverseStrand() ) ? start : end;
            AddCoverage(pos,pos);
        }
    }
    // close the BAM
    reader.Close();

    // process the results of the last chromosome.
    ReportChromCoverage(_currChromCoverage, _currChromSize,
            _currChromName, _currChromDepthHist);

    // report all empty chromsomes
    PrintEmptyChromosomes();

    // report the overall coverage if asked.
    PrintFinalCoverage();
}
Ejemplo n.º 7
0
void BedIntersect::IntersectBam(string bamFile) {

	// load the "B" bed file into a map so
	// that we can easily compare "A" to it for overlaps
	_bedB->loadBedFileIntoMap();
	
	// open the BAM file
	BamReader reader;
	BamWriter writer;
	reader.Open(bamFile);

	// get header & reference information
	string header  = reader.GetHeaderText();
	RefVector refs = reader.GetReferenceData();

	// open a BAM output to stdout if we are writing BAM
	if (_bamOutput == true) {
		// open our BAM writer
        writer.Open("stdout", header, refs, _isUncompressedBam);
	}

	vector<BED> hits;
	// reserve some space
	hits.reserve(100);
	
	_bedA->bedType = 6;
	BamAlignment bam;	
	// get each set of alignments for each pair.
	while (reader.GetNextAlignment(bam)) {
		
		if (bam.IsMapped()) {	
			BED a;
			a.chrom = refs.at(bam.RefID).RefName;
			a.start = bam.Position;
			a.end   = bam.GetEndPosition(false);

			// build the name field from the BAM alignment.
			a.name = bam.Name;
			if (bam.IsFirstMate()) a.name += "/1";
			if (bam.IsSecondMate()) a.name += "/2";

			a.score  = ToString(bam.MapQuality);
			
			a.strand = "+"; 
			if (bam.IsReverseStrand()) a.strand = "-"; 
	
			if (_bamOutput == true) {
			    bool overlapsFound = false;
			    // treat the BAM alignment as a single "block"
			    if (_obeySplits == false) {
				    overlapsFound = FindOneOrMoreOverlap(a);
				}
				// split the BAM alignment into discrete blocks and
				// look for overlaps only within each block.
				else {
                    bool overlapFoundForBlock;
				    bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
				    // we don't want to split on "D" ops, hence the "false"
                    getBamBlocks(bam, refs, bedBlocks, false);
                    
                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
                	vector<BED>::const_iterator bedEnd  = bedBlocks.end();
                	for (; bedItr != bedEnd; ++bedItr) {
            	        overlapFoundForBlock = FindOneOrMoreOverlap(a);
            	        if (overlapFoundForBlock == true)
                            overlapsFound = true;
            	    }
				}
				if (overlapsFound == true) {
					if (_noHit == false)
						writer.SaveAlignment(bam);
				}
				else {
					if (_noHit == true) {
						writer.SaveAlignment(bam);
					}	
				}
			}
			else {
			    // treat the BAM alignment as a single BED "block"
			    if (_obeySplits == false) {
				    FindOverlaps(a, hits);
				    hits.clear();
			    }
			    // split the BAM alignment into discrete BED blocks and
				// look for overlaps only within each block.
			    else {
			        bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
                    getBamBlocks(bam, refs, bedBlocks, false);

                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
                	vector<BED>::const_iterator bedEnd  = bedBlocks.end();
                	for (; bedItr != bedEnd; ++bedItr) {
            	        FindOverlaps(*bedItr, hits);
                        hits.clear();
            	    }
			    }
			}
		}
	}
	
	// close the relevant BAM files.
	reader.Close();
	if (_bamOutput == true) {
		writer.Close();
	}
}
Ejemplo n.º 8
0
/*
Input file format: BAM file
*/
void Distinguish( string mappingFile, map< string, bool > & hapSNP, map< string, string > & refSeq, string outfilePrefix ) {
// [WARNING] If refSeq is not empty than it's BS data, otherwirse is resequencing data.
	string outfile1 = outfilePrefix + ".1.bam";
	string outfile2 = outfilePrefix + ".2.bam";
	string outHomoF = outfilePrefix + ".h**o.bam";
	string outUdeci = outfilePrefix + ".ambiguity.bam"; // The ambiguity reads or the reads which has't any SNP.

	BamReader h_I; // bam input file handle
	if ( !h_I.Open( mappingFile ) ) cerr << "[ERROR]: " << h_I.GetErrorString() << endl;

	// "header" and "references" from BAM files, these are required by BamWriter
	const SamHeader header     = h_I.GetHeader();
	const RefVector references = h_I.GetReferenceData();
	
	BamWriter h_O1, h_O2, h_U, h_H;
	if ( !h_O1.Open( outfile1, header, references ) ) { cerr << "Cannot open output BAM file: " << outfile1 << endl; exit(1); }
	if ( !h_O2.Open( outfile2, header, references ) ) { cerr << "Cannot open output BAM file: " << outfile2 << endl; exit(1); }
	if ( !h_U.Open ( outUdeci, header, references ) ) { cerr << "Cannot open output BAM file: " << outUdeci << endl; exit(1); }
	if ( !h_H.Open ( outHomoF, header, references ) ) { cerr << "Cannot open output BAM file: " << outHomoF << endl; exit(1); }

	int readsNumberRecord(0);

	SamLine samline;                                       // Samline class
	SamExt      sam;
	BamAlignment al;
	map< string, pair<BamAlignment, SamExt> > firstMateAl; // record the first mate reads alignment, HIstory problem to be like this struct!!

	string refstr; 					                       // Just For BS data.
	bool isC2T ( false );			                       // Just For BS data.
	while ( h_I.GetNextAlignment( al ) ) {

		++readsNumberRecord;
		if ( readsNumberRecord % 1000000 == 0 ) 
			cerr << "Have been dealed " << readsNumberRecord << " lines. " << local_time ();

		if ( !al.IsMapped() ) continue;
        //if ( al.InsertSize == 0 || al.RefID != al.MateRefID ) continue;

		samline._RID      = al.Name; samline._Flag= al.AlignmentFlag;
        samline._ref_id   = h_I.GetReferenceData()[al.RefID].RefName;
        samline._position = al.Position + 1;     // Position (0-base starts in BamTools), but I need 1-base starts
        samline._mapQ     = al.MapQuality;
		// MateRefID == -1 means mate read is unmapping
        samline._XorD     = ( al.MateRefID > -1 ) ? h_I.GetReferenceData()[al.MateRefID].RefName : "*"; 
        samline._coor     = al.MatePosition + 1; // Position (0-base starts in BamTools), but I need 1-base starts
        samline._seq      = al.QueryBases;
        samline._insert_size = abs (al.InsertSize);
		if ( samline._ref_id.compare( "BIG_ID_CAT" ) == 0 ) continue; // Ignore "BIG_ID_CAT"

		// get cigar;
        samline._cigar = itoa(al.CigarData[0].Length); samline._cigar.append( 1, al.CigarData[0].Type );
		for ( size_t i(1); i < al.CigarData.size(); ++i ) {
            samline._cigar += itoa(al.CigarData[i].Length);
            samline._cigar.append( 1, al.CigarData[i].Type );
        }

        sam.assign( &samline );
		/*********************************** For BS Data *********************************************/
		if ( !refSeq.empty() ) { // If the data is BS data, we should modify the QueryBases.
			if ( !refSeq.count( samline._ref_id ) ) { 
				cerr << "[ERROR]There's no such reference in the reference file. " << samline._ref_id << endl;
				exit(1);
			}
			if ( al.IsFirstMate() && !al.IsReverseStrand() ) { 
				isC2T = true; 
			} 
			else if ( al.IsFirstMate() && al.IsReverseStrand() ) { 
				isC2T = false; 
			} 
			else if ( al.IsSecondMate() && !al.IsReverseStrand() ) {
				isC2T = false;
			}
			else if ( al.IsSecondMate() && al.IsReverseStrand() ) {
				isC2T = true;
			} else {
				cerr << "[ERROR MATCH] " << endl; exit(1);
			}
			refstr.assign( refSeq[samline._ref_id], sam.ref_start() - 1, sam.ref_end() - sam.ref_start() + 1 );
			modifyBSreadBases( samline._ref_id, sam.ref_start (), sam.read_start(), sam.cigar_seq(), refstr, sam._seq, hapSNP, isC2T );
		}
		/********************************** End For BS Data *******************************************/

		// Consider the mate pair reads
		if ( !firstMateAl.count(al.Name) && (al.MateRefID > -1) ) { 

			firstMateAl[al.Name] = std::make_pair( al, sam );
		} else { // Consider the mate pair reads

			if ( !firstMateAl.count(al.Name) ) {

				switch ( Decide( sam, hapSNP ) ) { 
					case 1 : h_O1.SaveAlignment( al ); break; // Hap1
					case 2 : h_O2.SaveAlignment( al ); break; // Hap2
					case 0 : h_U.SaveAlignment ( al ); break; // Ambiguity
					default: // This alignment didn't contain any hete SNP.
							 h_H.SaveAlignment ( al );        // Homozygous reads
				}
			} else {
				
				int mark1 = Decide( firstMateAl[al.Name].second, hapSNP );
				int mark2 = Decide( sam, hapSNP );

				if ( mark1 == 1 && mark2 == 1 ) {
					
					h_O1.SaveAlignment( firstMateAl[al.Name].first );
					h_O1.SaveAlignment( al );
				} else if ( (mark1 == 1 && mark2 ==  0) || (mark1 == 0  && mark2 == 1) ) {
					
					h_O1.SaveAlignment( firstMateAl[al.Name].first );
					h_O1.SaveAlignment( al );
				} else if ( (mark1 == 1 && mark2 == -1) || (mark1 == -1 && mark2 == 1) ) {

					h_O1.SaveAlignment( firstMateAl[al.Name].first );
					h_O1.SaveAlignment( al );
				} else if ( mark1 == 2 && mark2 == 2 ) {

					h_O2.SaveAlignment( firstMateAl[al.Name].first );
					h_O2.SaveAlignment( al );
				} else if ( (mark1 == 2 && mark2 == 0 ) || (mark1 == 0  && mark2 == 2) ) {
					
					h_O2.SaveAlignment( firstMateAl[al.Name].first );
					h_O2.SaveAlignment( al );
				} else if ( (mark1 == 2 && mark2 == -1) || (mark1 == -1 && mark2 == 2) ) {

					h_O2.SaveAlignment( firstMateAl[al.Name].first );
					h_O2.SaveAlignment( al );
				} else if ( mark1 == -1 && mark2 == -1 ) {

					h_H.SaveAlignment ( firstMateAl[al.Name].first );
					h_H.SaveAlignment ( al );
				} else {
					
					h_U.SaveAlignment ( firstMateAl[al.Name].first );
					h_U.SaveAlignment ( al );
				}

				firstMateAl.erase( al.Name );
			}
		}
	}

	cerr << "------------  Remaind size: " << firstMateAl.size() << endl;
	for (map< string, pair<BamAlignment, SamExt> >::iterator it( firstMateAl.begin() ); it != firstMateAl.end(); ++it ) {

		switch ( Decide( it->second.second, hapSNP ) ) {
			case 1 : h_O1.SaveAlignment( it->second.first ); break; // Hap1
			case 2 : h_O2.SaveAlignment( it->second.first ); break; // Hap2
			case 0 : h_U.SaveAlignment ( it->second.first ); break; // Ambiguity
			default: // This alignment didn't contain any hete SNP.
					 h_H.SaveAlignment ( it->second.first );        // Homozygous reads
		}
	}

	h_I.Close();
	h_U.Close();
	h_H.Close();
	h_O1.Close();
	h_O2.Close();

	cerr << ">>>>>>>>>>>>> All Done <<<<<<<<<<<<<<" << endl;
	cerr << "Write to output file: " << outfile1    << endl;
	cerr << "Write to output file: " << outfile2    << endl;
	cerr << "Write to output file: " << outHomoF    << endl;
	cerr << "Write to output file: " << outUdeci    << endl;

	return;
}
Ejemplo n.º 9
0
int main (int argc, char *argv[]) {

    // bool mapped  =false;
    // bool unmapped=false;
    int bpToDecrease5=1;
    int bpToDecrease3=2;

    const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+
			      "\tThis program takes a BAM file as input and produces\n"+
			      "\tanother where the putative deaminated bases have\n"+
			      "\ta base quality score of "+intStringify(baseQualForDeam)+"\n"+
			      "\tgiven an "+intStringify(offset)+" offset \n"+
			      "\n"+
			      "\tOptions:\n"+
			      "\t\t"+"-n5" +"\t\t\t"+"Decrease the nth bases surrounding the 5' ends (Default:"+stringify(bpToDecrease5)+") "+"\n"+
			      "\t\t"+"-n3" +"\t\t\t"+"Decrease the nth bases surrounding the 3' ends (Default:"+stringify(bpToDecrease3)+") "+"\n"
			      );
			      // "\t"+"-u , --unmapped" +"\n\t\t"+"For an unmapped bam file"+"\n"+
			      // "\t"+"-m , --mapped"   +"\n\t\t"+"For an mapped bam file"+"\n");
			      
			      

    if( (argc== 1) ||
	(argc== 2 && string(argv[1]) == "-h") ||
	(argc== 2 && string(argv[1]) == "-help") ||
	(argc== 2 && string(argv[1]) == "--help") ){
	cout<<"Usage:"<<endl;
	cout<<usage<<endl;
	cout<<""<<endl;
	return 1;
    }

    for(int i=1;i<(argc-2);i++){ //all but the last arg

    	if( string(argv[i]) == "-n5" ){
	    bpToDecrease5 = destringify<int>(argv[i+1]);
            i++;
    	    continue;
    	}

    	if( string(argv[i]) == "-n3" ){
	    bpToDecrease3 = destringify<int>(argv[i+1]);
            i++;
    	    continue;
    	}
       
    	cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
    	return 1;
    }

    if(argc < 3){
	cerr<<"Error: Must specify the input and output BAM files";
	return 1;
    }

    string inbamFile =argv[argc-2];
    string outbamFile=argv[argc-1];

    if(inbamFile == outbamFile){
	cerr<<"Input and output files are the same"<<endl;
	return 1;    
    }
    // if(!mapped && !unmapped){
    // 	cerr << "Please specify whether you reads are mapped or unmapped" << endl;
    // 	return 1;
    // }
    // if(mapped && unmapped){
    // 	cerr << "Please specify either mapped or unmapped but not both" << endl;
    // 	return 1;
    // }

    BamReader reader;

    if ( !reader.Open(inbamFile) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
    }


    vector<RefData>  testRefData=reader.GetReferenceData();
    SamHeader header = reader.GetHeader();
    string pID          = "decrQualDeaminated";   
    string pName        = "decrQualDeaminated";   
    string pCommandLine = "";
    for(int i=0;i<(argc);i++){
        pCommandLine += (string(argv[i])+" ");
    }
    putProgramInHeader(&header,pID,pName,pCommandLine);


    const RefVector references = reader.GetReferenceData();

    BamWriter writer;
    if ( !writer.Open(outbamFile, header, references) ) {
	cerr << "Could not open output BAM file" << endl;
	return 1;
    }

    BamAlignment al;
    // BamAlignment al2;
    // bool al2Null=true;
    
    while ( reader.GetNextAlignment(al) ) {

	    if(al.IsPaired() ){  

		if(al.IsFirstMate() ){ //5' end, need to check first base only
		    if(al.IsReverseStrand()){ //
			if(!al.IsMapped()){
			    cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl;
			    //return 1;
			}
			int indexToCheck;

			//5' of first mate reversed
			indexToCheck=al.QueryBases.length()-1;
			for(int i=0;i<bpToDecrease5;i++){			
			    if(toupper(al.QueryBases[indexToCheck]) == 'A'){
				al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			    }
			    indexToCheck=max(indexToCheck-1,0);
			}

			
		    }else{
			int indexToCheck;
			//5' of first mate
			indexToCheck=0;
			for(int i=0;i<bpToDecrease5;i++){ //first base			
			    if(toupper(al.QueryBases[indexToCheck]) == 'T'){
				al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			    }
			    indexToCheck=min(indexToCheck+1,int(al.Qualities.size()));
			}

		    }


		}else{ //3' end, need to check last two bases only
		    if( al.IsSecondMate() ){
			if(al.IsReverseStrand()){ //
			    if(!al.IsMapped()){
				cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl;
				//return 1;
			    }
			    int indexToCheck;

			    //3' of second mate reversed
			    indexToCheck=al.QueryBases.length()-1;
			    for(int i=0;i<bpToDecrease3;i++){			
				if(toupper(al.QueryBases[indexToCheck]) == 'T'){
				    al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
				}
				indexToCheck=max(indexToCheck-1,0);
			    }
			    

			}else{
			    int indexToCheck;
			    
			    //3' of second mate forward
			    indexToCheck=0;
			    for(int i=0;i<bpToDecrease3;i++){ //first base			
				if(toupper(al.QueryBases[indexToCheck]) == 'A'){
				    al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
				}
				indexToCheck=min(indexToCheck+1,int(al.Qualities.size()));
			    }

			}
		    }else{
			cerr << "Wrong state" << endl;
			return 1;
		    }
		}

	    }//end of paired end
	    else{//we consider single reads to have been sequenced from 5' to 3'

		if(al.IsReverseStrand()){ //need to consider 
		    if(!al.IsMapped()){
			cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl;
			//return 1;
		    }

		    int indexToCheck;

		    //5' of single read reversed
		    indexToCheck=al.QueryBases.length()-1;
		    for(int i=0;i<bpToDecrease5;i++){			
			if(toupper(al.QueryBases[indexToCheck]) == 'A'){
			    al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			}
			indexToCheck=max(indexToCheck-1,0);
		    }

		    //3' of single read reversed
		    indexToCheck=0;
		    for(int i=0;i<bpToDecrease3;i++){ //first base			
			if(toupper(al.QueryBases[indexToCheck]) == 'A'){
			    al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			}
			indexToCheck=min(indexToCheck+1,int(al.Qualities.size()));
		    }

		    

		}else{

		    int indexToCheck;
		    
		    //5' of single read
		    indexToCheck=0;
		    for(int i=0;i<bpToDecrease5;i++){ //first base			
			if(toupper(al.QueryBases[indexToCheck]) == 'T'){
			    al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			}
			indexToCheck=min(indexToCheck+1,int(al.Qualities.size()));
		    }


		    //3' of single read
		    indexToCheck=al.QueryBases.length()-1;
		    for(int i=0;i<bpToDecrease3;i++){			
			if(toupper(al.QueryBases[indexToCheck]) == 'T'){
			    al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
			}
			indexToCheck=max(indexToCheck-1,0);
		    }



		}
	    }//end of single end

	    writer.SaveAlignment(al);		


    }//    while ( reader.GetNextAlignment(al) ) {

    reader.Close();
    writer.Close();

    cerr<<"Program terminated gracefully"<<endl;   
    return 0;
}
Ejemplo n.º 10
0
int main (int argc, char *argv[]) {

    if(argc != 4){
	cerr<<"This program strips the mapping information and cuts sequences"<<endl;
	cerr<<"Usage "<<argv[0]<<" [bam file in] [bam file out] [distribution]"<<endl;
	cerr<<"The distribution is one per line"<<endl;
	return 1;
    }

    string bamfiletopen  = string(argv[1]);
    string bamfiletwrite = string(argv[2]);
    string fileDist       = string(argv[3]);

    igzstream myFile;
    string line;
    vector<int> distToUse;
    myFile.open(fileDist.c_str(), ios::in);
    
    if (myFile.good()){
	while ( getline (myFile,line)){
	    distToUse.push_back(   destringify<int>(line) );
	}
	myFile.close();
    }else{
	cerr << "Unable to open file "<<fileDist<<endl;
	return 1;
    }
    cerr<<"Read "<<distToUse.size()<<" data points "<<endl;


    cerr<<"Reading "<<bamfiletopen<<" writing to "<<bamfiletwrite<<endl;

    BamReader reader;
    BamWriter writer;

    if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
    }

    SamHeader  myHeader=reader.GetHeader();
    SamProgram sp;

    string pID          = "removeTagsMapping";   
    string pName        = "removeTagsMapping";   
    string pCommandLine = "";
    for(int i=0;i<(argc);i++){
	pCommandLine += (string(argv[i])+" ");
    }
    putProgramInHeader(&myHeader,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),"."));

    //no @SQ
    myHeader.Sequences.Clear();
    vector< RefData > 	emptyRefVector;

    if( !writer.Open(bamfiletwrite,myHeader,emptyRefVector ) ) {
    	cerr << "Could not open output BAM file  "<<bamfiletwrite << endl;
    	return 1;	
    }

    unsigned int readsTotal=0;

    BamAlignment al;
    while ( reader.GetNextAlignment(al) ) {
	//deleting tag data
	al.TagData="";

	//reset the flag
	// if(al.IsPaired()){
	//     if(al.IsFirstMate()){
	// 	al.AlignmentFlag =  flagFirstPair;
	//     }else{
	// 	al.AlignmentFlag =  flagSecondPair;
	//     }
	// }else{
	// }

	if(al.IsPaired()){
	    if(al.IsFirstMate()){
		al.Name =  al.Name+"/1";
	    }else{
		al.Name =  al.Name+"/2";
	    }
	}

	al.AlignmentFlag =  flagSingleReads;


	//no ref or positon
	al.RefID=-1;
	al.MateRefID=-1;
	al.Position=-1;
	al.MatePosition=-1;
	//no insert size
	al.InsertSize=0;
	//no cigar
	al.CigarData.clear();
	//no mapping quality 
	al.MapQuality=0;
	int length = distToUse[ randomInt(0,distToUse.size()-1) ];
	length = MIN( length, al.Length);
	al.QueryBases = al.QueryBases.substr(0,length);
	al.Qualities  = al.Qualities.substr( 0,length);


	writer.SaveAlignment(al);
	readsTotal++;
    }

    reader.Close();
    writer.Close();

    cerr<<"Program "<<argv[0]<<" terminated gracefully, looked at "<<readsTotal<<endl;

    return 0;
}