Ejemplos de BamAlignment::HasTag en C++ (Cpp)

Lenguaje de programación: C++ (Cpp)

Clase / Tipo: BamAlignment

Método / Función: HasTag

Ejemplos en hotexamples.com: 10

C++ (Cpp) BamAlignment::HasTag - 10 ejemplos encontrados. Estos son los ejemplos en C++ (Cpp) del mundo real mejor valorados de BamAlignment::HasTag extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

IsReverseStrand(30)

IsMapped(27)

GetEndPosition(22)

IsPaired(16)

GetTag(16)

IsFirstMate(10)

IsSecondMate(10)

HasTag(10)

IsMateMapped(6)

AddTag(5)

IsDuplicate(5)

IsFailedQC(4)

IsPrimaryAlignment(4)

IsProperPair(4)

RemoveTag(4)

IsMateReverseStrand(3)

GetReadGroup(3)

SetIsDuplicate(3)

SetIsReverseStrand(3)

BuildCharData(2)

EditTag(2)

SetIsMapped(2)

getCIGAR(2)

getInsertSize(2)

getMD(2)

getMateDir(2)

GetErrorString(1)

GetTagType(1)

SetIsPaired(1)

SetIsSecondMate(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: VariantProcessor.cpp Proyecto: vsbuffalo/hapcount

pos_t VariantProcessor::processAlignment(const BamAlignment& alignment) {
  /* 
     For each alignment, extract the MD and NM tags, validate against
     CIGAR string, and create Variants and ReadHaplotypes. All reads
     for a block are stored in a deque, and processed again to create
     candidate haplotypes.

     Returns the start position of this alignment (TODO correct?)
  */  
  
  if (!alignment.HasTag("NM") || !alignment.HasTag("MD")) {
    std::cerr << "error: BamAlignment '" << alignment.Name << 
      "' does not have either NM or MD tags" << std::endl;
  }
  
  int nm_tag; 
  string md_tag;
  unsigned int aln_len = alignment.GetEndPosition() - alignment.Position;

  alignment.GetTag("MD", md_tag);
  alignment.GetTag("NM", nm_tag);
  
  // Reconstruct reference sequence using MD tags
  string refseq = createReferenceSequence(alignment);

  // With reconstructed reference sequence and query sequence, look
  // for variants. It's a bit roundabout to reconstruct reference from
  // MD, then use it to find variants (already in MD) but keeping
  // state between CIGAR and MD is tricky. This also is a good
  // validation; variants found must much the number of variants in
  // CIGAR/MD.
  vector<VariantPtr> variants;
  vector<VariantPtr> read_variants;
  const vector<CigarOp>& cigar = alignment.CigarData;
  int refpos = 0, readpos = 0;
  
  for (vector<CigarOp>::const_iterator op = cigar.begin(); op != cigar.end(); ++op) {
    if (op->Type == 'S') {
      readpos += op->Length;
    } else if (op->Type == 'M') {
      // match or SNP
      processMatchOrMismatch(alignment, read_variants, op->Length, refseq, refpos, readpos);
      readpos += op->Length;
      refpos += op->Length;
    } else if (op->Type == 'I') {
      processInsertion(alignment, read_variants, op->Length, refseq, refpos, readpos);
      readpos += op->Length;
    } else if (op->Type == 'D') {
      processDeletion(alignment, read_variants, op->Length, refseq, refpos, readpos);
      refpos += op->Length; // deletion w.r.t reference; skip ref length
    } else {
      cerr << "error: unidentified CIGAR type: " << op->Type << endl;
      exit(1);
    }
  }

  // Add to alignments list
  block_alignments.push_back(alignment);
  return 0; // TODO(vsbuffalo)
}

Ejemplo n.º 2

Mostrar archivo

Archivo: DataStatisticsTool.cpp Proyecto: ShujiaHuang/PyroTools

int DataStatisticsTool::Execute()
{
    // iterate over reads in BAM file(s)
    BamAlignment alignObj;
    while(bamReader.GetNextAlignment(alignObj))
    {
        if (alignObj.IsDuplicate()) continue;
        if (alignObj.IsFailedQC()) continue;
        if (!alignObj.IsMapped()) continue;
        if (!alignObj.IsPrimaryAlignment()) continue;
        if (alignObj.IsPaired() && !alignObj.IsProperPair()) continue;
        if (alignObj.IsPaired() && !alignObj.IsMateMapped()) continue;
        if (!alignObj.HasTag("MD")) continue;

//        // debug
//        GenericBamAlignmentTools::printBamAlignmentCigar(alignObj);
//        GenericBamAlignmentTools::printBamAlignmentMD(alignObj);

        // shift InDel
        GenericBamAlignmentTools::leftShiftInDel(alignObj);

//        // debug
//        GenericBamAlignmentTools::printBamAlignmentCigar(alignObj);
//        GenericBamAlignmentTools::printBamAlignmentMD(alignObj);

        // get the alignment sequences
        string alignRead;
        string alignGenome;
        GenericBamAlignmentTools::getAlignmentSequences(alignObj, alignRead, alignGenome);

        // update the statistics
        statistics.update(alignRead, alignGenome);
    }


    // print to screen
    cout << statistics << endl;
//    statistics.printMatchMismatch();

    // close BAM reader
    bamReader.Close();

    // close Fasta
    genomeFasta.Close();

    return 1;
}

Ejemplo n.º 3

Mostrar archivo

Archivo: editDist.cpp Proyecto: grenaud/libbam

int main (int argc, char *argv[]) {

     if( (argc== 1) ||
    	(argc== 2 && string(argv[1]) == "-h") ||
    	(argc== 2 && string(argv[1]) == "-help") ||
    	(argc== 2 && string(argv[1]) == "--help") ){
	 cout<<"Usage:editDist [in bam]"<<endl<<"this program returns the NM field of all aligned reads"<<endl;
	 return 1;
     }

     string bamfiletopen = string(argv[1]);
     // cout<<bamfiletopen<<endl;
     BamReader reader;
     // cout<<"ok"<<endl;
     if ( !reader.Open(bamfiletopen) ) {
	 cerr << "Could not open input BAM files." << endl;
	 return 1;
     }

     BamAlignment al;
     // cout<<"ok"<<endl;
     while ( reader.GetNextAlignment(al) ) {
	 // cout<<al.Name<<endl;
	 if(!al.IsMapped())
	     continue;

	 if(al.HasTag("NM") ){
	     int editDist;
	     if(al.GetTag("NM",editDist) ){
		 cout<<editDist<<endl;
	     }else{
		 cerr<<"Cannot retrieve NM field for "<<al.Name<<endl;
		 return 1;
	     }
	 }else{
	     cerr<<"Warning: read "<<al.Name<<" is aligned but has no NM field"<<endl;
	 }

		    

     } //while al

     reader.Close();

     return 0;
}

Ejemplo n.º 4

Mostrar archivo

Archivo: SV_SplitRead.cpp Proyecto: gkno/lumpy-sv

//{{{ void process_intra_chrom_split(const BamAlignment &curr,
void
SV_SplitRead::
process_intra_chrom_split(const BamAlignment &curr,
                          const RefVector refs,
                          BamWriter &inter_chrom_reads,
                          map<string, BamAlignment> &mapped_splits,
                          UCSCBins<SV_BreakPoint*> &r_bin,
                          int weight,
                          int id,
                          int sample_id,
                          SV_SplitReadReader *_reader)
{

    if (mapped_splits.find(curr.Name) == mapped_splits.end()) {
        uint32_t clipped = count_clipped(curr.CigarData);
        if ( curr.HasTag("YP") == true) {
            uint32_t t;
            curr.GetTag("YP", t);
            if (t == 2)
                mapped_splits[curr.Name] = curr;
        }
        else if (clipped >= _reader->min_clip)
            mapped_splits[curr.Name] = curr;
    } else {
        if ( mapped_splits[curr.Name].RefID == curr.RefID ) {
            try {
                SV_SplitRead *new_split_read =
                    new SV_SplitRead(mapped_splits[curr.Name],
                                     curr,
                                     refs,
                                     weight,
                                     id,
                                     sample_id,
                                     _reader);

                SV_BreakPoint *new_bp = NULL;
                if (new_split_read->is_sane()) {
                    new_bp = new_split_read->get_bp();

                    if (new_bp != NULL) {
                        new_bp->cluster(r_bin);
                    } else {
                        cerr << "Alignment name:" << curr.Name << endl;
                        free(new_split_read);
                    }
                } else
                    free(new_split_read);

            } catch (int) {
                cerr << "Error creating split read: " << endl;
            }

        } else {
            BamAlignment al1 = curr;
            BamAlignment al2 = mapped_splits[curr.Name];

            al1.MateRefID = al2.RefID;
            al2.MateRefID = al1.RefID;

            al1.MatePosition = al2.Position;
            al2.MatePosition = al1.Position;

            string x = _reader->get_source_file_name();

            al1.AddTag("LS","Z",x);
            al2.AddTag("LS","Z",x);

            inter_chrom_reads.SaveAlignment(al1);
            inter_chrom_reads.SaveAlignment(al2);
        }
        mapped_splits.erase(curr.Name);
    }
}

Ejemplo n.º 5

Mostrar archivo

Archivo: splitByRG.cpp Proyecto: grenaud/aLib

int main (int argc, char *argv[]) {

     if( (argc!= 3) ||
    	(argc== 2 && string(argv[1]) == "-h") ||
    	(argc== 2 && string(argv[1]) == "-help") ||
    	(argc== 2 && string(argv[1]) == "--help") ){
	 cerr<<"Usage:splitByRG [in bam] [out prefix]"<<endl<<"this program creates one bam file per RG in the with the outprefix\nFor example splitByRG in.bam out will create\nout.rg1.bam\nout.rg2.bam\n"<<endl;
    	return 1;
    }


     string bamfiletopen = string(argv[1]);
     // if(!strEndsWith(bamfiletopen,".bam")){

     // }
     string bamDirOutPrefix    = string(argv[2]);
     map<string,BamWriter *> rg2BamWriter;
     
     // if(!isDirectory(bamDirOut)){
     // 	 cerr<<"ERROR: the out directory does not exist"<<endl;
     // 	return 1;
     // }

     BamReader reader;

     if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
     }

    SamHeader header = reader.GetHeader();
    const RefVector references = reader.GetReferenceData();
    vector<RefData>  refData=reader.GetReferenceData();
    string pID          = "splitByRG";   
    string pName        = "splitByRG";   
    string pCommandLine = "";
    for(int i=0;i<(argc);i++){
        pCommandLine += (string(argv[i])+" ");
    }
    putProgramInHeader(&header,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),".."));


    SamReadGroupDictionary 	srgd=header.ReadGroups;
    for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
	srgci<srgd.ConstEnd();
	srgci++){
	//cout<<*srgci<<endl;
	const SamReadGroup rg = (*srgci);
	//cout<<rg.ID<<endl;
	rg2BamWriter[rg.ID] = new  BamWriter();
	rg2BamWriter[rg.ID]->Open(bamDirOutPrefix+"."+rg.ID+".bam",header,references); 
    }



    BamAlignment al;
    unsigned int total=0;
    while ( reader.GetNextAlignment(al) ) {

	// al.SetIsFailedQC(false);
	// writer.SaveAlignment(al);
	// if(al.IsMapped () ){
	//     if(rg2BamWriter.find(refData[al.RefID].RefName) == rg2BamWriter.end()){ //new
	// 	rg2BamWriter[refData[al.RefID].RefName] = new  BamWriter();
	// 	if ( !rg2BamWriter[refData[al.RefID].RefName]->Open(bamDirOutPrefix+"."+refData[al.RefID].RefName+".bam",header,references) ) {
	// 	    cerr     << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<refData[al.RefID].RefName<<".bam" << endl;
	// 	    return 1;
	// 	}
	
	//     }else{
	// 	rg2BamWriter[refData[al.RefID].RefName]->SaveAlignment(al);
	//     }
	// }else{
	//     unmapped.SaveAlignment(al);
	// }
	if(al.HasTag("RG")){
	    string rgTag;
	    al.GetTag("RG",rgTag);
	    //cout<<rgTag<<endl;
	    if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
		cerr<<"Found new RG "<<rgTag<<endl;
		rg2BamWriter[rgTag] = new  BamWriter();
	 	if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
	 	    cerr     << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
	 	    return 1;
	 	}
		rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	    }else{
		rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	    }
	}else{
	    string rgTag="unknown";	    
	    //cout<<rgTag<<endl;
	    if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
		cerr<<"Found new RG "<<rgTag<<endl;
		rg2BamWriter[rgTag] = new  BamWriter();
	 	if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
	 	    cerr     << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
	 	    return 1;
	 	}
		rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	    }else{
		rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	    }

	    // cerr << "Cannot get RG tag for " << al.Name<<endl;
	    // return 1;
	}

	total++;
    } //while al

    reader.Close();
    // writer.Close();
    
    // unmapped.Close();

    map<string,BamWriter *>::iterator rg2BamWriterIt;
    for (rg2BamWriterIt =rg2BamWriter.begin(); 
	 rg2BamWriterIt!=rg2BamWriter.end(); 
	 rg2BamWriterIt++){
	rg2BamWriterIt->second->Close();
    }
    cerr<<"Wrote succesfully "<<total<<" reads"<<endl;


    return 0;
}

Ejemplo n.º 6

Mostrar archivo

Archivo: plotQualScores.cpp Proyecto: grenaud/aLib

int main (int argc, char *argv[]) {
    if( (argc== 1) ||
	(argc== 2 && string(argv[1]) == "-h") ||
	(argc== 2 && string(argv[1]) == "-help") ||
	(argc== 2 && string(argv[1]) == "--help") ){
	cout<<"Usage:"<<endl;
	cout<<""<<endl;
	cout<<"plotQualScore input.bam"<<endl;
	return 1;
    }

    string bamfiletopen = string(argv[1]);
    BamReader reader;

    if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
    }

    // if ( !reader.LocateIndex() ){
    // 	cerr << "warning: cannot locate index for file " << bamfiletopen<<endl;
    // 	//return 1;
    // }

    BamAlignment al;
    BamAlignment al2;
    
    bool unsurePEorSE=true;
    bool pe=true;
    int strLength=-1;
    int vecLengthToUse=-1;

    map<short,unsigned long>  ** counterA = 0;
    map<short,unsigned long>  ** counterC = 0;
    map<short,unsigned long>  ** counterG = 0;
    map<short,unsigned long>  ** counterT = 0;
    
    int lengthIndex1=0;
    int lengthIndex2=0;
    string seqInd1;
    string seqInd2;
    string qualInd1;
    string qualInd2;
    int offsetInd2;

    while ( reader.GetNextAlignment(al) ) {
	if(unsurePEorSE){
	    strLength=al.QueryBases.length();
	    if(al.IsPaired()){
		pe=true;
		vecLengthToUse=2*strLength;		
	    }else{
		pe=false;
		vecLengthToUse=strLength;
	    }
	    string index1;
	    string index2;
	
	    if(al.HasTag("XI")){
		al.GetTag("XI",index1);
		vecLengthToUse+=index1.length();
		lengthIndex1=index1.length();
	    }

	    if(al.HasTag("XJ")){
		al.GetTag("XJ",index2);
		vecLengthToUse+=index2.length();
		lengthIndex2=index2.length();
	    }

	    counterA     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterC     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterG     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterT     = new map<short,unsigned long>  * [vecLengthToUse];
	    for(int i=0;i<vecLengthToUse;i++){
		counterA[i]=new map<short,unsigned long>  ();
		counterC[i]=new map<short,unsigned long>  ();
		counterG[i]=new map<short,unsigned long>  ();
		counterT[i]=new map<short,unsigned long>  ();
		for(short k=minQualScore;k<=maxQualScore;k++){
		    (*counterA[i])[k]=0;
		    (*counterC[i])[k]=0;
		    (*counterG[i])[k]=0;
		    (*counterT[i])[k]=0;
		}
	    }
	    unsurePEorSE=false;
	}else{
	    if(pe  &&
	       !al.IsPaired()){
		cerr << "Cannot have unpaired reads in PE mode" << endl;
		return 1;
	    }

	    if(!pe  &&
	       al.IsPaired()){
		cerr << "Cannot have unpaired reads in SE mode" << endl;
		return 1;
	    }
	}
	
	if(al.QueryBases.length() !=  al.Qualities.length()){
	    cerr << "Cannot have different lengths for sequence and quality" << endl;
	    return 1;
	}
	if(int(al.QueryBases.length()) !=  strLength){
	    cerr << "Cannot have different lengths for sequence and quality" << endl;
	    return 1;
	}

	if(pe){
	    if(al.IsFirstMate()){
		reader.GetNextAlignment(al2);
		if(al2.QueryBases.length() !=  al2.Qualities.length()){
		    cerr << "Cannot have different lengths for sequence and quality" << endl;
		    return 1;
		}

	    }else{
		cerr << "First read should be the first mate" << endl;
		return 1;	    
	    }
	}



	//cycle
	for(unsigned int i=0;i<al.QueryBases.length();i++){
	    short x=(short(al.Qualities[i])-qualOffset);
	    if(al.QueryBases[i] == 'A'){
	    	(*counterA[i])[x]++;
	    }
	    if(al.QueryBases[i] == 'C'){
	    	(*counterC[i])[x]++;
	    }
	    if(al.QueryBases[i] == 'G'){
	    	(*counterG[i])[x]++;
	    }
	    if(al.QueryBases[i] == 'T'){
	    	(*counterT[i])[x]++;
	    }
	}

	//The indices for al and al2 should hopefully be the same 
	if(lengthIndex1>0){
	    al.GetTag("XI",seqInd1);
	    al.GetTag("YI",qualInd1);
	    int j;

	    for(int i=0;i<lengthIndex1;i++){
		j=i+al.QueryBases.length();
		short x=(short(qualInd1[i])-qualOffset);
		if(seqInd1[i] == 'A'){
		    (*counterA[j])[x]++;
		}
		if(seqInd1[i] == 'C'){
		    (*counterC[j])[x]++;
		}
		if(seqInd1[i] == 'G'){
		    (*counterG[j])[x]++;
		}
		if(seqInd1[i] == 'T'){
		    (*counterT[j])[x]++;
		}
	    }
	}

	if(pe){
	    offsetInd2=al.QueryBases.length()+lengthIndex1+al2.QueryBases.length();
	    int j;
	    for(unsigned int i=0;i<al2.QueryBases.length();i++){
		j=i+al.QueryBases.length()+lengthIndex1;
		short x=(short(al2.Qualities[i])-qualOffset);
		if(al2.QueryBases[i] == 'A'){
		    (*counterA[j])[x]++;
		}
		if(al2.QueryBases[i] == 'C'){
		    (*counterC[j])[x]++;
		}
		if(al2.QueryBases[i] == 'G'){
		    (*counterG[j])[x]++;
		}
		if(al2.QueryBases[i] == 'T'){
		    (*counterT[j])[x]++;
		}
	    }
	}else{
	    offsetInd2=al.QueryBases.length()+lengthIndex1;
	}

	//The indices for al and al2 should hopefully be the same 
	if(lengthIndex2>0){
	    al.GetTag("XJ",seqInd2);
	    al.GetTag("YJ",qualInd2);
	    int j;

	    for(int i=0;i<lengthIndex2;i++){
		j=offsetInd2+i;
		short x=(short(qualInd2[i])-qualOffset);
		if(seqInd2[i] == 'A'){
		    (*counterA[j])[x]++;
		}
		if(seqInd2[i] == 'C'){
		    (*counterC[j])[x]++;
		}
		if(seqInd2[i] == 'G'){
		    (*counterG[j])[x]++;
		}
		if(seqInd2[i] == 'T'){
		    (*counterT[j])[x]++;
		}
	    }
	}

    }
    reader.Close();

    cout<<"cycle\t"<<"nuc\t";
    for(short k=minQualScore;k<maxQualScore;k++){
	cout<<k<<"\t";
    }
    cout<<maxQualScore<<endl;

    for(int i=0;i<vecLengthToUse;i++){
	cout<<(i+1)<<"\t";
	cout<<"A\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterA[i])[k]<<"\t";
	}
	cout<<(*counterA[i])[maxQualScore]<<endl;
	cout<<(i+1)<<"\t";
	cout<<"C\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterC[i])[k]<<"\t";
	}
	cout<<(*counterC[i])[maxQualScore]<<endl;
	cout<<(i+1)<<"\t";
	cout<<"G\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterG[i])[k]<<"\t";
	}
	cout<<(*counterG[i])[maxQualScore]<<endl;
	cout<<(i+1)<<"\t";
	cout<<"T\t";
	for(short k=minQualScore;k<maxQualScore;k++){
	    cout<<(*counterT[i])[k]<<"\t";
	}
	cout<<(*counterT[i])[maxQualScore]<<endl;	
    }



    
    return 0;
}

Ejemplo n.º 7

Mostrar archivo

Archivo: leeHom.cpp Proyecto: gedankenstuecke/leeHom

int main (int argc, char *argv[]) {


    bool produceUnCompressedBAM=false;
    bool verbose=false;
    bool ancientDNA=false;
    bool keepOrig=false;

    string adapter_F=options_adapter_F_BAM;
    string adapter_S=options_adapter_S_BAM;
    string adapter_chimera=options_adapter_chimera_BAM;
    string key="";
    bool allowMissing=false;
    int trimCutoff=1;

    bool allowAligned=false;
    bool printLog=false;
    string logFileName;

    BamReader reader;
    BamWriter writer;

    string bamFile;
    string bamFileOUT="";

    string key1;
    string key2;
    
    bool useDist=false;
    double location=-1.0;
    double scale   =-1.0;

    bool fastqFormat=false;
    string fastqfile1   = "";
    string fastqfile2   = "";
    string fastqoutfile = "";
    bool singleEndModeFQ=true;

    const string usage=string(string(argv[0])+
			      
			      " [options] BAMfile"+"\n"+
			      "\nThis program takes an unaligned BAM where mates are consecutive\nor fastq files and trims and merges reads\n"+

			      "\n\tYou can specify a unaligned bam file or one or two fastq :\n"+			      
			      "\t\t"+"-fq1" +"\t\t"+"First fastq"+"\n"+
			      "\t\t"+"-fq2" +"\t\t"+"Second  fastq file (for paired-end)"+"\n"+
			      "\t\t"+"-fqo" +"\t\t"+"Output fastq prefix"+"\n\n"+
			      //"\t"+"-p , --PIPE"+"\n\t\t"+"Read BAM from and write it to PIPE"+"\n"+
			      "\t"+"-o , --outfile" +"\t\t"+"Output (BAM format)."+"\n"+


			      "\t"+"-u            " +"\t\t"+"Produce uncompressed bam (good for pipe)"+"\n"+

			      //	"\t"+" , --outprefix" +"\n\t\t"+"Prefix for output files (default '"+outprefix+"')."+"\n"+
			      //"\t"+" , --SAM" +"\n\t\t"+"Output SAM not BAM."+"\n"+
			      "\t"+"--aligned" +"\t\t"+"Allow reads to be aligned (default "+boolStringify(allowAligned)+")"+"\n"+
			      "\t"+"-v , --verbose" +"\t\t"+"Turn all messages on (default "+boolStringify(verbose)+")"+"\n"+
			      "\t"+"--log [log file]" +"\t"+"Print a tally of merged reads to this log file (default only to stderr)"+"\n"+
			      
			      "\n\t"+"Paired End merging/Single Read trimming  options"+"\n"+
			      "\t\t"+"You can specify either:"+"\n"+
			      "\t\t\t"+"--ancientdna"+"\t\t\t"+"ancient DNA (default "+boolStringify(ancientDNA)+")"+"\n"+
			      "\t\t"+"            "+"\t\t\t\t"+"this allows for partial overlap"+"\n"+
			      "\n\t\t"+"or if you know your size length distribution:"+"\n"+
			      "\t\t\t"+"--loc"+"\t\t\t\t"+"Location for lognormal dist. (default none)"+"\n"+
			      "\t\t\t"+"--scale"+"\t\t\t\t"+"Scale for lognormal dist. (default none)"+"\n"+
			      //			      "\t\t\t\t\t\t\tGood for merging ancient DNA reads into a single sequence\n\n"
			      "\n\t\t"+"--keepOrig"+"\t\t\t\t"+"Write original reads if they are trimmed or merged  (default "+boolStringify(keepOrig)+")"+"\n"+
			      "\t\t\t\t\t\t\tSuch reads will be marked as PCR duplicates\n\n"
			      "\t\t"+"-f , --adapterFirstRead" +"\t\t\t"+"Adapter that is observed after the forward read (def. Multiplex: "+options_adapter_F_BAM.substr(0,30)+")"+"\n"+
			      "\t\t"+"-s , --adapterSecondRead" +"\t\t"+"Adapter that is observed after the reverse read (def. Multiplex: "+options_adapter_S_BAM.substr(0,30)+")"+"\n"+
			      "\t\t"+"-c , --FirstReadChimeraFilter" +"\t\t"+"If the forward read looks like this sequence, the cluster is filtered out.\n\t\t\t\t\t\t\tProvide several sequences separated by comma (def. Multiplex: "+options_adapter_chimera_BAM.substr(0,30)+")"+"\n"+
			      "\t\t"+"-k , --key"+"\t\t\t\t"+"Key sequence with which each sequence starts. Comma separate for forward and reverse reads. (default '"+key+"')"+"\n"+
			      "\t\t"+"-i , --allowMissing"+"\t\t\t"+"Allow one base in one key to be missing or wrong. (default "+boolStringify(allowMissing)+")"+"\n"+
			      "\t\t"+"-t , --trimCutoff"+"\t\t\t"+"Lowest number of adapter bases to be observed for single Read trimming (default "+stringify(trimCutoff)+")");

    if( (argc== 1) ||
    	(argc== 2 && string(argv[1]) == "-h") ||
    	(argc== 2 && string(argv[1]) == "-help") ||
    	(argc== 2 && string(argv[1]) == "--help") ){
    	cout<<"Usage:"<<endl;
    	cout<<""<<endl;
    	cout<<usage<<endl;
    	return 1;
    }

    

    for(int i=1;i<(argc-1);i++){ //all but the last arg

	if(strcmp(argv[i],"-fq1") == 0 ){
	    fastqfile1=string(argv[i+1]);
	    fastqFormat=true;
	    i++;
	    continue;
	}

	if(strcmp(argv[i],"-fq2") == 0 ){
	    fastqfile2=string(argv[i+1]);
	    fastqFormat=true;
	    singleEndModeFQ=false;
	    i++;
	    continue;
	}

	if(strcmp(argv[i],"-fqo") == 0 ){
	    fastqoutfile=string(argv[i+1]);
	    fastqFormat=true;
	    i++;
	    continue;
	}




	if(strcmp(argv[i],"--log") == 0 ){
	    logFileName =string(argv[i+1]);
	    printLog=true;
	    i++;
	    continue;
	}

	if(strcmp(argv[i],"-p") == 0 || strcmp(argv[i],"--PIPE") == 0 ){
	    cerr<<"This version no longer works with pipe, exiting"<<endl;
	    return 1;	    
	}

	if(strcmp(argv[i],"-u") == 0  ){
	    produceUnCompressedBAM=true;
	    continue;
	}

	if(strcmp(argv[i],"--aligned") == 0  ){
	    allowAligned=true;
	    continue;
	}



	if(strcmp(argv[i],"-o") == 0 || strcmp(argv[i],"--outfile") == 0 ){
	    bamFileOUT =string(argv[i+1]);
	    i++;
	    continue;
	}

	if(strcmp(argv[i],"-v") == 0 || strcmp(argv[i],"--verbose") == 0 ){
	    verbose=true;
	    continue;
	}

	if(strcmp(argv[i],"--ancientdna") == 0 ){
	    ancientDNA=true;
	    continue;
	}

	if(strcmp(argv[i],"--keepOrig") == 0 ){
	    keepOrig=true;
	    continue;
	}

	if(strcmp(argv[i],"--loc") == 0 ){
	    location =destringify<double>(argv[i+1]);
	    i++;
	    continue;
	}

	if(strcmp(argv[i],"--scale") == 0 ){
	    scale =destringify<double>(argv[i+1]);
	    i++;
	    continue;
	}



	if(strcmp(argv[i],"-f") == 0 || strcmp(argv[i],"--adapterFirstRead") == 0 ){
	    adapter_F =string(argv[i+1]);
	    i++;
	    continue;
	}


	if(strcmp(argv[i],"-s") == 0 || strcmp(argv[i],"--adapterSecondRead") == 0 ){
	    adapter_S =string(argv[i+1]);
	    i++;
	    continue;
	}

	if(strcmp(argv[i],"-c") == 0 || strcmp(argv[i],"--FirstReadChimeraFilter") == 0 ){
	    adapter_chimera =string(argv[i+1]);
	    i++;
	    continue;
	}

	if(strcmp(argv[i],"-k") == 0 || strcmp(argv[i],"--keys") == 0 ){
	    key =string(argv[i+1]);
	    i++;
	    continue;
	}
	

	if(strcmp(argv[i],"-i") == 0 || strcmp(argv[i],"--allowMissing") == 0 ){
	    allowMissing=true;
	    continue;
	}

	if(strcmp(argv[i],"-t") == 0 || strcmp(argv[i],"--trimCutoff") == 0 ){
	    trimCutoff=atoi(argv[i+1]);
	    i++;
	    continue;
	}
	
	cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
	return 1;	    
    }

    bamFile=argv[argc-1];

    if( (location != -1.0 && scale == -1.0) ||
	(location == -1.0 && scale != -1.0) ){
	cerr<<"Cannot specify --location without specifying --scale"<<endl;
	return 1;	    
    }
	
    if( (location != -1.0 && scale != -1.0) ){
	useDist=true;
	    
	if(ancientDNA){
	    cerr<<"Cannot specify --location/--scale and --ancientDNA"<<endl;
	    return 1;	    
	}
    }
    
    MergeTrimReads mtr (adapter_F,adapter_S,adapter_chimera,
			key1,key2,
			trimCutoff,allowMissing,ancientDNA,location,scale,useDist);

    fqwriters onereadgroup;

    if(fastqFormat){
	
	if( bamFileOUT != ""  || produceUnCompressedBAM || allowAligned){
	    cerr<<"ERROR : Cannot specify options like -o, -u or --allowAligned for fastq"<<endl;
	    return 1;
	}

	if(fastqfile1 == ""){
	    cerr<<"ERROR : Must specify as least the first file for fastq"<<endl;
	    return 1;	    
	}



	FastQParser * fqp1;
	FastQParser * fqp2;

	if(singleEndModeFQ){
	    fqp1 = new FastQParser (fastqfile1);

	    string outdirs   = fastqoutfile+".fq.gz";
	    string outdirsf  = fastqoutfile+".fail.fq.gz";

	    onereadgroup.single.open(outdirs.c_str(), ios::out);
	    onereadgroup.singlef.open(outdirsf.c_str(), ios::out);

	    if(!onereadgroup.single.good()){       cerr<<"Cannot write to file "<<outdirs<<endl; return 1; }
	    if(!onereadgroup.singlef.good()){      cerr<<"Cannot write to file "<<outdirsf<<endl; return 1; }

	    
	}else{
	    fqp1 = new FastQParser (fastqfile1);
	    fqp2 = new FastQParser (fastqfile2);

	    string outdirs   = fastqoutfile+".fq.gz";
	    string outdir1   = fastqoutfile+"_r1.fq.gz";
	    string outdir2   = fastqoutfile+"_r2.fq.gz";

	    string outdirsf  = fastqoutfile+".fail.fq.gz";
	    string outdir1f  = fastqoutfile+"_r1.fail.fq.gz";
	    string outdir2f  = fastqoutfile+"_r2.fail.fq.gz";

	    onereadgroup.single.open(outdirs.c_str(), ios::out);
	    onereadgroup.pairr1.open(outdir1.c_str(), ios::out);
	    onereadgroup.pairr2.open(outdir2.c_str(), ios::out);

	    onereadgroup.singlef.open(outdirsf.c_str(), ios::out);
	    onereadgroup.pairr1f.open(outdir1f.c_str(), ios::out);
	    onereadgroup.pairr2f.open(outdir2f.c_str(), ios::out);

	    if(!onereadgroup.single.good()){       cerr<<"Cannot write to file "<<outdirs<<endl; return 1; }
	    if(!onereadgroup.pairr1.good()){       cerr<<"Cannot write to file "<<outdir1<<endl; return 1; }
	    if(!onereadgroup.pairr2.good()){       cerr<<"Cannot write to file "<<outdir2<<endl; return 1; }
	    
	    if(!onereadgroup.singlef.good()){      cerr<<"Cannot write to file "<<outdirsf<<endl; return 1; }
	    if(!onereadgroup.pairr1f.good()){      cerr<<"Cannot write to file "<<outdir1f<<endl; return 1; }
	    if(!onereadgroup.pairr2f.good()){      cerr<<"Cannot write to file "<<outdir2f<<endl; return 1; }
	    
	}


	unsigned int totalSeqs=0;
	while(fqp1->hasData()){

	    FastQObj * fo1=fqp1->getData();
	    vector<string> def1=allTokens( *(fo1->getID()), ' '  );
	    string def1s=def1[0];
	

	    FastQObj * fo2;
	    string def2s;
	    string ext2s;

	    if(!singleEndModeFQ){
		if(!fqp2->hasData()){
		    cerr << "ERROR: Discrepency between fastq files at record " <<  *(fo1->getID()) <<endl;
		    return 1;
		}

		fo2=fqp2->getData();
		vector<string> def2=allTokens( *(fo2->getID()), ' ' );
		def2s=def2[0];




		if(strEndsWith(def1s,"/1")){
		    def1s=def1s.substr(0,def1s.size()-2);
		}
		if(strEndsWith(def2s,"/2")){
		    def2s=def2s.substr(0,def2s.size()-2);
		}

		if(strBeginsWith(def1s,"@")){
		    def1s=def1s.substr(1,def1s.size()-1);
		}
		if(strBeginsWith(def2s,"@")){
		    def2s=def2s.substr(1,def2s.size()-1);
		}


		if(def1s != def2s){
		    cerr << "ERROR: Discrepency between fastq files, different names " << *(fo1->getID()) <<" and "<< *(fo2->getID()) <<endl;
		    return 1;
		}

		merged result=	mtr.process_PE(*(fo1->getSeq()),*(fo1->getQual()),
					       *(fo2->getSeq()),*(fo2->getQual()));

		mtr.incrementCountall();

		if(result.code != ' '){ //keys or chimeras

		    if(result.code == 'K'){
			mtr.incrementCountfkey();
		    }else{
			if(result.code == 'D'){
			    mtr.incrementCountchimera();
			}else{
			    cerr << "leehom: Wrong return code =\""<<result.code<<"\""<<endl;
			    exit(1);
			}
		    }
			
		    onereadgroup.pairr2f<<"@"<<def2s<<"/2" <<endl <<*(fo2->getSeq())<<endl<<"+"<<endl <<*(fo2->getQual())<<endl;
		    onereadgroup.pairr1f<<"@"<<def1s<<"/1" <<endl <<*(fo1->getSeq())<<endl<<"+"<<endl <<*(fo1->getQual())<<endl;
		    continue;

		}else{
		        if(result.sequence != ""){ //new sequence			    
			    onereadgroup.single<<"@"<<def1s<<"" <<endl << result.sequence<<endl<<"+"<<endl <<result.quality<<endl;    	    

			    if( result.sequence.length() > max(fo1->getSeq()->length(),fo2->getSeq()->length()) ){
				mtr.incrementCountmergedoverlap();
			    }else{
				mtr.incrementCountmerged();			  
			    }

			}else{ //keep as is
			    mtr.incrementCountnothing();

			    onereadgroup.pairr2<<"@"<<def2s<<"/2" <<endl <<*(fo2->getSeq())<<endl<<"+"<<endl <<*(fo2->getQual())<<endl;
			    onereadgroup.pairr1<<"@"<<def1s<<"/1" <<endl <<*(fo1->getSeq())<<endl<<"+"<<endl <<*(fo1->getQual())<<endl;
			    

			}
		}

	    }else{
		
		
		merged result=mtr.process_SR(*(fo1->getSeq()),*(fo1->getQual()));
		mtr.incrementCountall();

		if(result.code != ' '){ //either chimera or missing key

		    if(result.code == 'K'){
			mtr.incrementCountfkey();
		    }else{
			if(result.code == 'D'){
			    mtr.incrementCountchimera();
			}else{
			    cerr << "leehom: Wrong return code =\""<<result.code<<"\""<<endl;
			    exit(1);
			}
		    }

		    onereadgroup.singlef<<""<<*(fo1->getID())<<"" <<endl << *(fo1->getSeq())<<endl<<"+"<<endl <<*(fo1->getQual())<<endl;
		    continue;
		}

		if(result.sequence != ""){ //new sequence
		    mtr.incrementCounttrimmed();
		    onereadgroup.single<<""<<*(fo1->getID())<<"" <<endl << result.sequence<<endl<<"+"<<endl <<result.quality<<endl;
		}else{
		    mtr.incrementCountnothing();
		    onereadgroup.single<<""<<*(fo1->getID())<<"" <<endl << *(fo1->getSeq())<<endl<<"+"<<endl <<*(fo1->getQual())<<endl;
		}

	    }
	


	    totalSeqs++;
	}
    
	delete fqp1;
	if(!singleEndModeFQ){
	    delete fqp2;
	}

	if(singleEndModeFQ){

	    onereadgroup.single.close();
	    onereadgroup.singlef.close();
	    
	}else{
	    onereadgroup.single.close();
	    onereadgroup.pairr1.close();
	    onereadgroup.pairr2.close();
	    
	    onereadgroup.singlef.close();
	    onereadgroup.pairr1f.close();
	    onereadgroup.pairr2f.close();
	}
    
	//fastq
    }else{
	//else BAM


	//  initMerge();
	//     set_adapter_sequences(adapter_F,
	// 			  adapter_S,
	// 			  adapter_chimera);
	//     set_options(trimCutoff,allowMissing,mergeoverlap);
	if(key != ""){
	    size_t found=key.find(",");
	    if (found == string::npos){ //single end reads
		key1=key;
		key2="";
	    } else{                     //paired-end
		key1=key.substr(0,found);
		key2=key.substr(found+1,key.length()-found+1);
	    }
	}






	if( bamFileOUT == ""  ){
	    cerr<<"The output must be a be specified, exiting"<<endl;
	    return 1;
	}

	if ( !reader.Open(bamFile) ) {
	    cerr << "Could not open input BAM file  "<<bamFile << endl;
	    return 1;
	}
	SamHeader header = reader.GetHeader();

    

	string pID          = "mergeTrimReadsBAM";   
	string pName        = "mergeTrimReadsBAM";   
	string pCommandLine = "";
	for(int i=0;i<(argc);i++){
	    pCommandLine += (string(argv[i])+" ");
	}
	putProgramInHeader(&header,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),".."));

	const RefVector references = reader.GetReferenceData();
	//we will not call bgzip with full compression, good for piping into another program to 
	//lessen the load on the CPU
	if(produceUnCompressedBAM) 
	    writer.SetCompressionMode(BamWriter::Uncompressed);

	if ( !writer.Open(bamFileOUT,header,references) ) {
	    cerr << "Could not open output BAM file "<<bamFileOUT << endl;
	    return 1;
	}



	SamHeader sh=reader.GetHeader();
	//Up to the user to be sure that a sequence is followed by his mate
	// if(!sh.HasSortOrder() || 
	//    sh.SortOrder != "queryname"){
	// 	cerr << "Bamfile must be sorted by queryname" << endl;
	// 	return 1;
	// }
    

	BamAlignment al;
	BamAlignment al2;
	bool al2Null=true;
    
	while ( reader.GetNextAlignment(al) ) {

	
	    if(al.IsMapped() || al.HasTag("NM") || al.HasTag("MD")  ){
		if(!allowAligned){
		    cerr << "Reads should not be aligned" << endl;
		    return 1;
		}else{
		    //should we remove tags ?
		}
	    }


	    if(al.IsPaired() && 
	       al2Null ){
		al2=al;
		al2Null=false;
		continue;
	    }else{
		if(al.IsPaired() && 
		   !al2Null){

		    bool  result =  mtr.processPair(al,al2);
		
		    if( result ){//was merged
			BamAlignment orig;
			BamAlignment orig2;

			if(keepOrig){
			    orig2 = al2;
			    orig  = al;
			}

			writer.SaveAlignment(al);

			if(keepOrig){
			    orig.SetIsDuplicate(true);
			    orig2.SetIsDuplicate(true);
			    writer.SaveAlignment(orig2);
			    writer.SaveAlignment(orig);
			}

			//the second record is empty
		    }else{
			//keep the sequences as pairs

			writer.SaveAlignment(al2);		    
			writer.SaveAlignment(al);
		    }

		    //
		    //  SINGLE END
		    //
		}else{ 
		    BamAlignment orig;
		    if(keepOrig){
			orig =al;
		    }
		    mtr.processSingle(al);

		    if(keepOrig){
			//write duplicate
			if(orig.QueryBases.length()  != al.QueryBases.length()){
			    orig.SetIsDuplicate(true);
			    writer.SaveAlignment(orig);
			}
		    }
		    writer.SaveAlignment(al);



		} //end single end
		al2Null=true;
	    }//second pair
		    

	} //while al
	reader.Close();
	writer.Close();


    } //else BAM


    cerr <<mtr.reportSingleLine()<<endl;

    if(printLog){
	ofstream fileLog;
	fileLog.open(logFileName.c_str());

	if (fileLog.is_open()){
	    fileLog <<mtr.reportMultipleLines() <<endl;

	}else{
	    cerr << "Unable to print to file "<<logFileName<<endl;
	}
	fileLog.close();
    }
    return 0;
}

Ejemplo n.º 8

Mostrar archivo

Archivo: splitByRG.cpp Proyecto: grenaud/aLib

int main (int argc, char *argv[]) {

     if( (argc!= 4 && argc !=5 && argc !=6) ||
    	(argc== 2 && string(argv[1]) == "-h") ||
    	(argc== 2 && string(argv[1]) == "-help") ||
    	(argc== 2 && string(argv[1]) == "--help") ){
	 cerr<<"Usage:splitByRG [in bam] [rg Tally] [out prefix] (optional target)"<<endl<<"this program will subsample a BAM file per read group for a certain target\nFor example splitByRG in.bam tally.txt out will create\nout.rg1.bam\nout.rg2.bam\n"<<endl;
    	return 1;
    }


     string bamfiletopen      = string(argv[1]);
     string rgTally           = string(argv[2]);
     string bamDirOutPrefix   = string(argv[3]);
     
     int target            =  200000;
     int maxTarget         = 1000000;

     if(argc==5){
	 target    = destringify<int> ( string(argv[4]) );	 
     }

     if(argc==6){
	 target    = destringify<int> ( string(argv[4]) );	 
	 maxTarget = destringify<int> ( string(argv[5]) );	 
     }


     cerr<<"minimum fragments:\t"<<target<<endl;
     cerr<<"target  fragments:\t"<<maxTarget<<endl;

     string line;
     ifstream myFileTally;
     map<string,double> rg2Fraction;

     myFileTally.open(rgTally.c_str(), ios::in);
     cerr<<"Retained groups:\n"<<endl;
     cerr<<"RG\t#mapped\tfraction retained"<<endl;
     cerr<<"-----------------------------------"<<endl;

     if (myFileTally.is_open()){
	 while ( getline (myFileTally,line)){
	     vector<string> tokens = allTokens(line,'\t');
	     if(tokens.size() > 6)
		 if( tokens[1] == "pass" && 
		    (tokens[0] != "\"\""    && 
		     tokens[0] != "control" && 
		     tokens[0] != "TOTAL") ){
		     //cout<<tokens[0]<<"\t"<<tokens[5]<<endl;
		     int count = destringify<int>(tokens[5]);

		     if(count>target){

			 if(count>=maxTarget){
			     rg2Fraction[  tokens[0] ] = double(maxTarget)/double(count);
			     cout<<tokens[0]<<"\t"<<count<<"\t"<<double(maxTarget)/double(count)<<endl;
			 }else{
			     cout<<tokens[0]<<"\t"<<count<<"\t"<<1.0<<endl;
			     rg2Fraction[  tokens[0] ] = 1.0;
			 }
		     }
		 }
	 }
	 myFileTally.close();
     }else{
	 cerr << "Unable to open file "<<rgTally<<endl;
	 return 1;
     }



     map<string,BamWriter *> rg2BamWriter;
     
     // if(!isDirectory(bamDirOut)){
     // 	 cerr<<"ERROR: the out directory does not exist"<<endl;
     // 	return 1;
     // }

     BamReader reader;

     if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
     }
    const SamHeader header = reader.GetHeader();
    const RefVector references = reader.GetReferenceData();
    vector<RefData>  refData=reader.GetReferenceData();

    SamReadGroupDictionary 	srgd=header.ReadGroups;
    for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
	srgci<srgd.ConstEnd();
	srgci++){
	//cout<<*srgci<<endl;
	const SamReadGroup rg = (*srgci);
	//cout<<rg.ID<<endl;
	if( rg2Fraction.find(rg.ID) != rg2Fraction.end() ){
	    rg2BamWriter[rg.ID] = new  BamWriter();
	    rg2BamWriter[rg.ID]->Open(bamDirOutPrefix+"."+rg.ID+".bam",header,references); 
	}
	//cout<<bamDirOutPrefix+"."+rg.ID+".bam"<<endl;
    }
    //    return 1;

    //    BamWriter unmapped;

    // cout<<header.ToString()<<endl;
    // return 1;

    // if ( !unmapped.Open(bamDirOutPrefix+".unmapped.bam",header,references) ) {
    // 	cerr << "Could not open output BAM file "<< bamDirOutPrefix+".unmapped.bam" << endl;
    // 	return 1;
    // }

    //    cout<<"reading"<<endl;

    BamAlignment al;
    unsigned int total=0;
    while ( reader.GetNextAlignment(al) ) {


	if(al.HasTag("RG") &&
	   al.IsMapped() ){
	    string rgTag;
	    al.GetTag("RG",rgTag);
	    //cout<<rgTag<<endl;
	    if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new: ignore completely
	
		
	    }else{
		if( randomProb() <= rg2Fraction[  rgTag ] ){
		    rg2BamWriter[rgTag]->SaveAlignment(al);	 
		    //cout<<"wrote "<<rgTag<<endl;
		}   else{
		    //cout<<"skipped "<<rgTag<<endl;
		}	   
	    }
	}// else{
	//     string rgTag="unknown";	    
	//     //cout<<rgTag<<endl;
	//     if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
	// 	cerr<<"Found new RG "<<rgTag<<endl;
	// 	rg2BamWriter[rgTag] = new  BamWriter();
	//  	if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
	//  	    cerr     << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
	//  	    return 1;
	//  	}
	// 	rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	//     }else{
	// 	rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	//     }

	//     // cerr << "Cannot get RG tag for " << al.Name<<endl;
	//     // return 1;
	// }

	total++;
    } //while al

    reader.Close();
    // writer.Close();
    
    // unmapped.Close();

    map<string,BamWriter *>::iterator rg2BamWriterIt;
    for (rg2BamWriterIt =rg2BamWriter.begin(); 
	 rg2BamWriterIt!=rg2BamWriter.end(); 
	 rg2BamWriterIt++){
	rg2BamWriterIt->second->Close();
    }
    cerr<<"Wrote succesfully "<<total<<" reads"<<endl;


    return 0;
}

Ejemplo n.º 9

Mostrar archivo

Archivo: GenericIndividualSnpCall.cpp Proyecto: homopolymer/PyroTools

void GenericIndividualSnpCall::simpleSnpCall(string &fastaObj, BamReader &bamObj, int chrID, int leftPosition, int rightPosition, vector<Allele> &variantCandidates, map<int,list<tuple<char,int,int,double>>> &bamData)
{
    set<int> BlockSnpPositions;
    vector<Allele> BlockSnpAlleles;

    // rewind
    bamObj.Rewind();
    // set region
    bamObj.SetRegion(chrID, leftPosition, chrID, rightPosition);

    BamAlignment al;
    // search SNP positions in the region
    while (bamObj.GetNextAlignment(al))
    {
        if (!GenericBamAlignmentTools::goodAlignment(al))
            continue;

        if (!al.HasTag("MD"))
            continue;

        vector<long> SnpInAlignment;
        GenericBamAlignmentTools::getBamAlignmentMismatches(al, SnpInAlignment);

        for (int i=0; i<SnpInAlignment.size(); i++)
        {
            BlockSnpPositions.insert(SnpInAlignment[i]);
        }
    }

    // pileup visitor
    SimpleSnpCallPileupVisitor visitor(&fastaObj, chrID, leftPosition, rightPosition, m_downSample, &BlockSnpPositions, &BlockSnpAlleles, &bamData);

    PileupEngine SimpleSnpCallPileupEngine;
    SimpleSnpCallPileupEngine.AddVisitor(&visitor);

    // rewind
    bamObj.Rewind();
    // set region
    bamObj.SetRegion(chrID, leftPosition, chrID, rightPosition);
    // load data
    while(bamObj.GetNextAlignment(al))
    {

        if (!GenericBamAlignmentTools::goodAlignment(al))
            continue;

        if (!GenericBamAlignmentTools::validMapQuality(al, m_minMapQuality))
            continue;

        if (!GenericBamAlignmentTools::validReadIdentity(al, m_maxMismatchFrac))
            continue;

        if (!GenericBamAlignmentTools::validReadLength(al, m_minReadLength))
            continue;

        if (!al.HasTag("MD"))
            continue;

        SimpleSnpCallPileupEngine.AddAlignment(al);
    }
    SimpleSnpCallPileupEngine.Flush();

    // Filter SNP candidiate
    for (int i=0; i<BlockSnpAlleles.size(); i++)
    {
        Allele allele = BlockSnpAlleles[i];

        if (allele.m_alleleDepth < m_minSnpRead)
            continue;
        if (allele.m_alleleDepth < m_minSnpFrac*allele.m_globalDepth)
            continue;

        variantCandidates.push_back(allele);
    }

}

Ejemplo n.º 10

Mostrar archivo

Archivo: addXACircular.cpp Proyecto: grenaud/schmutzi

int main (int argc, char *argv[]) {

    if( (argc== 1) ||
	(argc== 2 && string(argv[1]) == "-h") ||
	(argc== 2 && string(argv[1]) == "-help") ||
	(argc== 2 && string(argv[1]) == "--help") ){
	cout<<"Usage:"<<argv[0]<<"<options>   [in bam] [ref original length] [extension length]"<<endl;
	cout<<"This program returns the same BAM file except with the XA flag for circular references"<<endl;
	cout<<"Options:"<<endl;
	//cout<<"\t-m\t\t\t\tUse mapped reads only"<<endl;
	return 1;
    }
     

    for(int i=1;i<(argc-3);i++){ //all but the last 3 args

	// if(string(argv[i]) == "-m" ){
	//     onlyMapped=true;
	//     continue;
	// }
      
	cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
	return 1;
    }


     string bamfiletopen  = string(          argv[argc-3]);
     int  origLength      = destringify<int>(argv[argc-2]);
     int  extLength       = destringify<int>(argv[argc-1]);
     
     string outputFilename = "/dev/stdout";

     BamReader reader;
     
     if ( !reader.Open(bamfiletopen) ) {
	 cerr << "Could not open input BAM files." << endl;
	 return 1;
     }

    const SamHeader header     = reader.GetHeader();
    const RefVector references = reader.GetReferenceData();

    BamWriter writer;
    if ( !writer.Open(outputFilename, header, references) ) {
	cerr << "Could not open output BAM file" << endl;
	return 1;
    }
    
    BamAlignment al;
    string nameTAG="XA";
    while ( reader.GetNextAlignment(al) ) {
	
	if(al.HasTag(nameTAG)) {
	    cerr << "ERROR: Read "<<al.Name<<" already has XA tags" << endl;
	    return 1;	    
	}
	
	writer.SaveAlignment(al);

		    
     } //while al

     reader.Close();
     writer.Close();

     return 0;
}