Beispiel #1
0
int _walk_through_sam_and_split(samfile_t * fin, samfile_t **foutList)
{
    bam1_t *b = bam_init1();
    int r, count = 0;

    while (0 <= (r = samread(fin, b))) {
      if(b->core.tid > -1){
        samwrite(foutList[b->core.tid], b);
      }else{
        samwrite(foutList[fin->header->n_targets], b);
      }
      count++;
    }
    bam_destroy1(b);

    return r >= -1 ? count : -1 * count;
}
Beispiel #2
0
static void dump_buf(buffer_t *buf, samfile_t *out)
{
	int i;
	for (i = 0; i < buf->n; ++i) {
		elem_t *e = buf->buf + i;
		if (e->score != -1) break;
		if (e->b) {
			samwrite(out, e->b);
			bam_destroy1(e->b);
			e->b = 0;
		}
	}
}
int main(int argc, char* argv[]) {
    samfile_t *ifile = NULL, *ofile = NULL;
    bam1_t *read = bam_init1();
    int keep = 0;
    char *p = NULL;

    //Open input file, either SAM or BAM
    p = strrchr(argv[1], '.');
    if(strcmp(p, ".bam") == 0) {
        ifile = samopen(argv[1], "rb", NULL);
    } else {
        ifile = samopen(argv[1], "r", NULL);
    }

    bam_header_t *head = ifile->header;

    //Open output file
    // ofile = samopen("AND_type.bam", "wb", ifile->header);
    ofile = samopen(argv[2], "wb", ifile->header);


    //Iterate through the lines
    while(samread(ifile, read) > 1) {
        keep = 0;
        //Is the read's mate on the same chromosome/contig?
        if(read->core.tid == read->core.mtid) {
            //Are the mates on opposite strands?
            if(read->core.flag & BAM_FREVERSE && !(read->core.flag & BAM_FMREVERSE)) {
                if(read->core.pos < read->core.mpos) {
                    // Are mates 500 bp or less from the ends?
                    if (read-> core.pos <= 500 && read->core.mpos > head->target_len[read->core.tid] - 500)
                        keep=1;
                }
            } else if(!(read->core.flag & BAM_FREVERSE) && read->core.flag & BAM_FMREVERSE) {
                if(read->core.mpos < read->core.pos) {
                    if (read-> core.mpos <= 500 && read->core.pos > head->target_len[read->core.tid] - 500)
                        keep=1;
                }
            }
        }
        if(keep) samwrite(ofile, read);
    }
    bam_destroy1(read);
    samclose(ifile);
    samclose(ofile);
    return 0;
}
Beispiel #4
0
void filter_by_id(const char* fn, hash_table* T)
{
    fprintf(stderr, "filtering ... \n");

    samfile_t* fin = samopen(fn, "rb", NULL);
    if (fin == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    samfile_t* fout = samopen("-", "w", (void*)fin->header);
    if (fout == NULL) {
        fprintf(stderr, "can't open stdout, for some reason.\n");
        exit(1);
    }

    fputs(fin->header->text, stdout);

    bam1_t* b = bam_init1();
    uint32_t n = 0;

    while (samread(fin, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }

        if (get_hash_table(T, bam1_qname(b), b->core.l_qname) == 1) {
            samwrite(fout, b);
        }
    }

    bam_destroy1(b);
    samclose(fout);
    samclose(fin);

    fprintf(stderr, "done.\n");
}
Beispiel #5
0
int
tmap_seqs_io_sff2sam_main(int argc, char *argv[])
{
  int c, help = 0;
  tmap_seqs_io_t *io_in = NULL;
  tmap_seqs_t *seqs = NULL;
  char **sam_rg = NULL;
  int32_t sam_rg_num = 0;
  int bidirectional = 0, sam_flowspace_tags = 0;
  int out_type = 0;
  tmap_sam_io_t *io_out = NULL;
  bam_header_t *header = NULL; // BAM Header
  int32_t i;

  /*
  uint8_t *key_seq = NULL;
  int key_seq_len = 0;
  */

  while((c = getopt(argc, argv, "DGR:Yvh")) >= 0) {
      switch(c) {
        case 'D': bidirectional = 1; break;
        case 'G': break;
        case 'R':
                  sam_rg = tmap_realloc(sam_rg, (1+sam_rg_num) * sizeof(char*), "sam_rg");
                  sam_rg[sam_rg_num] = tmap_strdup(optarg);
                  sam_rg_num++;
                  break;
        case 'Y': sam_flowspace_tags = 1; break;
        case 'v': tmap_progress_set_verbosity(1); break;
        case 'h': help = 1; break;
        default: return 1;
      }
  }
  if(1 != argc - optind || 1 == help) {
      tmap_file_fprintf(tmap_file_stderr, "Usage: %s %s [-R -Y -v -h] <in.sff>\n", PACKAGE, argv[0]);
      return 1; 
  }

  // input
  io_in = tmap_seqs_io_init(&argv[optind], 1, TMAP_SEQ_TYPE_SFF, TMAP_FILE_NO_COMPRESSION, 0l, 0l);

  // BAM Header
  header = tmap_seqs_io_to_bam_header(NULL, io_in, sam_rg, sam_rg_num, argc, argv);

  // open the output file
  switch(out_type) {
    case 0: // SAM
      io_out = tmap_sam_io_init2("-", "wh", header);
      break;
    case 1:
      io_out = tmap_sam_io_init2("-", "wb", header);
      break;
    case 2:
      io_out = tmap_sam_io_init2("-", "wbu", header);
      break;
    default:
      tmap_bug();
  }

  // destroy the BAM Header
  bam_header_destroy(header);
  header = NULL;

  seqs = tmap_seqs_init(TMAP_SEQ_TYPE_SFF);
  while(0 < tmap_seqs_io_read(io_in, seqs, io_out->fp->header->header)) {
      bam1_t *b = NULL;
      tmap_seq_t *seq = seqs->seqs[0];
      b = tmap_sam_convert_unmapped(seq, sam_flowspace_tags, bidirectional, NULL,
                                    0, 0, 0,
                                    0, 0, 0,
                                    "\tlq:i:%d\trq:i:%d\tla:i:%d\trq:i:%d",
                                    seq->data.sff->rheader->clip_qual_left,
                                    seq->data.sff->rheader->clip_qual_right,
                                    seq->data.sff->rheader->clip_adapter_left,
                                    seq->data.sff->rheader->clip_adapter_right);
      if(samwrite(io_out->fp, b) <= 0) {
          tmap_error("Error writing the SAM file", Exit, WriteFileError);
      }
      bam_destroy1(b); 
      tmap_seqs_destroy(seqs);
      seqs = tmap_seqs_init(TMAP_SEQ_TYPE_SFF);
  }
  tmap_seqs_destroy(seqs);

  // free memory
  tmap_seqs_io_destroy(io_in);
  tmap_sam_io_destroy(io_out);
  for(i=0;i<sam_rg_num;i++) {
      free(sam_rg[i]);
  }
  free(sam_rg);

  return 0;
}
Beispiel #6
0
BamX::BamX(pars & Params1)	// optional constructor
{
    // parameters
    Params=Params1;
    Nread=0;
    Npair=0;
    Nproper=0;
    Nout=0;
    LFlow=INT_MIN;
    LFhigh=INT_MAX;
    region.limit=false;
    IlluminizeBam=0;

    outFragTailBam=false;
    outInterChromBam=false;
    outUniqueMultipleBam=false;
    outUniquePartialBam=false;
    outUniqueUnmappedBam=false;
    outAllPairsBam=false;
    outReadPairPosBam=false;
    
    //output file
	//samfile_t *fp;
    bam_header_t *bam_header;
    
    string s = Params.getInput();
    BamUtil bam1(s);
    Bam = bam1;

    string filename=extractfilename(s);
    
    // parameters
    string fragPosFile = Params.getString("ReadPairPosFile");
    string r = Params.getString("ChromRegion");
    int maxReads = Params.getInt("MaxReads");
    Qmin = Params.getInt("Qmin");
    LRmin = Params.getInt("MinReadLength");
    maxmismatchPC=Params.getDouble("FractionMaxMisMatches");
    FragLengthWindow=Params.getInt("FragmentLengthWindow");
    int cmd_MateMode=Params.getInt("ReadPairSenseConfig");
    string ReferenceFastaFile=Params.getString("ReferenceFastaFile");
    FragmentTailPercent=Params.getDouble("FragmentTailPercent");
    IlluminizeBam=Params.getInt("Illuminize")>0;
    outputDirectory=Params.getString("OutputDirectory");
    int minLR=Params.getInt("MinReadLength"); 
    int SplitBracketMin=Params.getInt("SplitBracketMin"); 
    int SplitBaseQmin=Params.getInt("SplitBaseQmin"); 
    
    string StatFile=Params.getString("StatFile");
    if (StatFile.size()>0) {
        hists H1(StatFile);
        hist HLF=H1.h["LF"];
        hist HLR=H1.h["LR"];
        Params.setHist("LF",HLF);
        Params.setHist("LR",HLR);        
        H1.h.clear();  // free some memory 
        if (FragmentTailPercent>0) {
            LFlow=int(HLF.p2xTrim(FragmentTailPercent/100.));   
            LFhigh=int(HLF.p2xTrim(1-FragmentTailPercent/100.));   
        }
    }
    
    int dbg = Params.getInt("Dbg");
    time(&tprev);
    
    if (ReferenceFastaFile.size()>0) {
        FastaObj RF1(ReferenceFastaFile, "");
        Reference=RF1;
        RF1.seq.clear();  // free some memory 
    }
    
    bam_header= Bam.fp->header;
    string bamheadertext = bam_header->text;
    ReadGroup = extractBamTag(bamheadertext,"@RG");
    
    outAllPairsBam=(r.size()>0);
    if (!outAllPairsBam) { 
        outFragTailBam=true; //FragmentTailPercent>=0;
        outInterChromBam=true;
        outUniqueMultipleBam=true;
        outUniquePartialBam=true;
        outUniqueUnmappedBam=true;
    }
    // output Bams
    outputBam.clear();
    
    /*
    // test BamHeaderContainer
    vector<BamHeaderContainer> x;
    string sv=SpannerVersion;    
    string q="@PG\tID:FragmentTail\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
    while (true) {
        string outfile=outputDirectory+"/"+filename+".fragtail.bam";
        q=q+"\n@PG\tID:FragmentTail\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        BamHeaderContainer x1( bam_header, q); 
        x.push_back(x1);
        bam_header_t* h1=x[x.size()-1].header();
        cout<< h1->text << endl;
    }
    cout << x.size() << endl;
    */
    
    samfile_t *fpFT=0;
    samfile_t *fpIC=0;
    samfile_t *fpUM=0;
    samfile_t *fpUP=0;
    samfile_t *fpUZ=0;
    samfile_t *fpAP=0;
    samfile_t *fpWP=0;
    
    //region
    if (r.size()>0) {
        int r1,r2,r3;
        C_region r0(r); 
        region=r0;
        string bamRegion=region.region;
        size_t k=bamRegion.find("chr");
        if (k!=string::npos) {
            bamRegion=bamRegion.substr(3);
        }

        if ( bam_parse_region(bam_header, bamRegion.c_str(), &r1, &r2, &r3)==0) {
            region.limit=true;
            region.anchor=r1;
            region.start=r2;
            region.end=r3;
        } else {
            cerr << "region not found\t" << r << endl;
            exit(111);
        }
        
    }
    
    
    //fragPosFile
    if (fragPosFile.size()>0) {
        
        FragmentPosFileObj fp(fragPosFile);
        if (fp.fragmentPosList.size()>0) {
            FragPos=fp;
        } else {
            cerr << "Read Pair Pos file not found\t" <<  fragPosFile << endl;
            exit(112);
        }
        outFragTailBam=false; 
        outInterChromBam=false;
        outUniqueMultipleBam=false;
        outUniquePartialBam=false;
        outUniqueUnmappedBam=false;
        outReadPairPosBam=true;
        
    }

    
    if (outAllPairsBam) {
        string outfile=outputDirectory+"/"+filename+"."+r+".bam";
        string sv=SpannerVersion;
        string q="@PG\tID:Region\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        outputBam["AP"]=BamHeaderContainer(bam_header,q); 
        bam_header_t* h1=outputBam["AP"].header();
        if ((fpAP = samopen(outfile.c_str(), "wb", h1)) == 0) {
            fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str());
            exit(160);
        }
    }

    
    if (outFragTailBam) {
        string outfile=outputDirectory+"/"+filename+".fragtail.bam";
        string sv=SpannerVersion;
        string q="@PG\tID:FragmentTail\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        outputBam["FT"]=BamHeaderContainer(bam_header,q); 
        bam_header_t* h1=outputBam["FT"].header();
        if ((fpFT = samopen(outfile.c_str(), "wb", h1)) == 0) {
            fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str());
            exit(161);
        }
    }
     
    if (outInterChromBam) {
        string outfile=outputDirectory+"/"+filename+".interchrom.bam";
        string sv=SpannerVersion;
        string q="@PG\tID:InterChromPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        outputBam["IC"]=BamHeaderContainer(bam_header,q);   
        bam_header_t* h1=outputBam["IC"].header();
        if ((fpIC = samopen(outfile.c_str(), "wb", h1)) == 0) {
            fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str());
            exit(162);
        }
    }
    
    if (outUniqueMultipleBam) {
        string outfile=outputDirectory+"/"+filename+".uMult.bam";
        string sv=SpannerVersion;
        string q="@PG\tID:uniqMultiplyMappedPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        outputBam["UM"]=BamHeaderContainer(bam_header,q); 
        bam_header_t* h1=outputBam["IUM"].header();
        if ((fpUM = samopen(outfile.c_str(), "wb", h1)) == 0) {
            fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str());
            exit(163);
        }
    }
    
    if (outUniquePartialBam) {        
        string outfile=outputDirectory+"/"+filename+".uPart.bam";
        string sv=SpannerVersion;
        string q="@PG\tID:uniqPartiallyMappedPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        outputBam["UP"]=BamHeaderContainer(bam_header,q);  
        bam_header_t* h1=outputBam["UP"].header();
        if ((fpUP = samopen(outfile.c_str(), "wb", h1)) == 0) {
            fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str());
            exit(164);
        }
    }

    if (outUniqueUnmappedBam) {        
        string outfile=outputDirectory+"/"+filename+".uUnmapped.bam";
        string sv=SpannerVersion;
        string q="@PG\tID:uniqUnMappedPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        outputBam["UZ"]=BamHeaderContainer(bam_header,q); 
        bam_header_t* h1=outputBam["UZ"].header();
        if ((fpUZ = samopen(outfile.c_str(), "wb", h1)) == 0) {
            fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str());
            exit(165);
        }

    }

    if (outReadPairPosBam) {        
        string outfile=outputDirectory+"/"+filename+".weirdpairs.bam";
        string sv=SpannerVersion;
        string q="@PG\tID:weirdpairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine();
        outputBam["WP"]=BamHeaderContainer(bam_header,q); 
        bam_header_t* h1=outputBam["WP"].header();
        if ((fpWP = samopen(outfile.c_str(), "wb", h1)) == 0) {
            fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str());
            exit(165);
        }
        
    }

    
    cout << ReadGroup << endl << endl;
    
    //extractMateMode();
    
    if (cmd_MateMode>=0)   MateMode=cmd_MateMode;          
     
    BamContainerPair bampair;
    
    bool more = true;
    while (more)
    {
        bampair=Bam.getNextBamPair();
        // skip if neither end within region
        more=(bampair.BamEnd.size()>1);
        
        Npair++;
        if (Npair>=maxReads) break; 

        //
        if ( (dbg!=0)&&(elapsedtime()>float(dbg))) {
			time(&tprev);
			cout << " pairs:" << Npair << "\toutput:" << Nout;
			cout << "\tchr:" << bampair.BamEnd[0].b.core.tid+1;
            cout << "\tpos:" << bampair.BamEnd[0].b.core.pos;
            cout << endl;			
		}  
        
        if (!more) continue; 
        if (region.limit) {
            bool overlap = false;
            for (int e=0; e<=1; e++) {                 
                int a1=bampair.BamEnd[e].b.core.tid;
                int p1=bampair.BamEnd[e].b.core.pos;
                int p2=p1+bampair.BamEnd[e].len;
                overlap=region.overlap(a1,p1,p2);
                if (overlap) break; 
            }        
            if (!overlap) continue;
        }
    
        
        bampair.Illuminize(IlluminizeBam);  
        bampair.calcFragmentLengths();
        more=(bampair.BamEnd[1].packeddata.size()>1);
        //if (bampair.BamEnd[0].b.core.tid==bampair.BamEnd[1].b.core.tid) 
        //    cout<< bampair << endl;
        
        bool bothmap = ((bampair.BamEnd[0].b.core.flag&BAM_FUNMAP)==0)&&((bampair.BamEnd[0].b.core.flag&BAM_FMUNMAP)==0);
            
        
        if (outAllPairsBam) {
            Nout++;
            int s1=samwrite(fpAP, &(bampair.BamEnd[0].b));
            int s2=samwrite(fpAP, &(bampair.BamEnd[1].b));
            if ((s1*s2)>0) {
                continue;
            } else {
                cerr << "bad write to pairs.bam" << endl;
                exit(150);
            }
        }

        
        if (outReadPairPosBam) {
            int ichr1=bampair.BamEnd[0].b.core.tid+1;
            int istd1=bampair.BamEnd[0].sense=='+'? 0: 1;
            int ista1=bampair.BamEnd[0].b.core.pos+1;
            int iq1=bampair.BamEnd[0].q;
            int ichr2=bampair.BamEnd[1].b.core.tid+1;
            int istd2=bampair.BamEnd[1].sense=='+'? 0: 1;
            int ista2=bampair.BamEnd[1].b.core.pos+1;
            int iq2=bampair.BamEnd[1].q;
            
            FragmentPosObj  fp1(0,ichr1,istd1,ista1,0,ichr2,istd2,ista2,0,iq1, iq2,0);
            
            /*
             if ((fp1.chr1==10)&&(fp1.start1>=89687801)&&(fp1.end1<=89700722)) {
                cout << "read "<< fp1 << endl;                
            }
            */
            if (FragPos.find(fp1)) {
                Nout++;
                int s1=samwrite(fpWP, &(bampair.BamEnd[0].b));
                int s2=samwrite(fpWP, &(bampair.BamEnd[1].b));
                if ((s1*s2)>0) {
                    continue;
                } else {
                    cerr << "bad write to weirdpairs.bam" << endl;
                    exit(156);
                }
            }
        }        
        bool ok[2];
        for (int e=0; e<2; e++) {
            uint8_t*  bq=bam1_qual(&(bampair.BamEnd[e].b));
            int LR=bampair.BamEnd[0].b.core.l_qseq;
            double bok=0;
            for (int ib=0; ib<LR; ib++) {
                if (bq[ib]>SplitBaseQmin) {
                    bok++;
                }
            }
            ok[e]=(bok>LRmin);
        }
        
        if (! (ok[0]&ok[1]) )
            continue;
        
        if ( (outFragTailBam) & ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin)) ) {            
            bool FT=(bampair.FragmentLength>LFhigh)|((bampair.FragmentLength<LFlow)&(bampair.FragmentLength>INT_MIN))&bothmap;
            if (FT && (fpFT!=0)) {
                Nout++;
                int s1=samwrite(fpFT, &(bampair.BamEnd[0].b));
                int s2=samwrite(fpFT, &(bampair.BamEnd[1].b));
                //if (outputBam["FT"].write(&(bampair.BamEnd[0].b),&(bampair.BamEnd[1].b))) {
                if ((s1*s2)>0) {
                    continue;
                } else {
                    cerr << "bad write to fragtail.bam" << endl;
                    exit(151);
                }
            }
        }
        
        if ((outInterChromBam) & ((bampair.BamEnd[0].q>=Qmin)&(bampair.BamEnd[1].q>=Qmin))) { 
            bool IC=(bampair.BamEnd[0].b.core.tid!=bampair.BamEnd[1].b.core.tid)&&bothmap;
            if (IC && (fpIC!=0)) {
                Nout++;
                int s1=samwrite(fpIC, &(bampair.BamEnd[0].b));
                int s2=samwrite(fpIC, &(bampair.BamEnd[1].b));
                if ((s1*s2)>0) {
                    continue;
                } else {
                    cerr << "bad write to interchrom.bam" << endl;
                    exit(152);
                }
            }
        }
        if ((outUniqueMultipleBam) & ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin))){
            int im=bampair.BamEnd[0].nmap>1? 0: 1;
            int iu=bampair.BamEnd[0].q>=Qmin? 0: 1;
            bool UM=(bampair.BamEnd[iu].nmap>1)&&(iu!=im)&&bothmap;            
            if (UM && (fpUM!=0)) {
                Nout++;
                int s1=samwrite(fpUM, &(bampair.BamEnd[0].b));
                int s2=samwrite(fpUM, &(bampair.BamEnd[1].b));
                if ((s1*s2)>0) {
                    continue;
                } else {
                    cerr << "bad write to uMult.bam" << endl;
                    exit(153);
                }
            }
        }
        if ( (outUniquePartialBam) && ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin)) && bothmap) {            
            int c0=bampair.BamEnd[0].clip[0]+bampair.BamEnd[0].clip[1];
            int LR=bampair.BamEnd[0].b.core.l_qseq;
            bool split0=((LR-c0)>SplitBracketMin)&(c0>SplitBracketMin);
            int ib0=0;
            if ((split0)&(bampair.BamEnd[0].clip[0]>SplitBracketMin)) {
                ib0=bampair.BamEnd[0].clip[0];
            } else if ((split0)&(bampair.BamEnd[0].clip[1]>SplitBracketMin) ) {
                ib0=LR-bampair.BamEnd[0].clip[1];
            }
            split0=split0&(ib0>0);
            if (split0) {
                uint8_t*  bq=bam1_qual(&(bampair.BamEnd[0].b));
                for (int ib=(ib0-SplitBracketMin); ib<(ib0+SplitBracketMin); ib++) {
                    if (bq[ib]<SplitBaseQmin) {
                        split0=false;
                        break;
                    }
                }
            }
            
            int c1=bampair.BamEnd[1].clip[0]+bampair.BamEnd[1].clip[1];
            LR=bampair.BamEnd[1].b.core.l_qseq;
            bool split1=((LR-c0)>SplitBracketMin)&(c1>SplitBracketMin);;
            int ib1=0;
            if ((split1)&(bampair.BamEnd[1].clip[0]>SplitBracketMin)) {
                ib1=bampair.BamEnd[1].clip[0];
            } else if ((split1)&(bampair.BamEnd[1].clip[1]>SplitBracketMin) ) {
                ib1=LR-bampair.BamEnd[1].clip[1];
            }
            split1=split1&(ib1>0);
            if (split1) {
                uint8_t*  bq=bam1_qual(&(bampair.BamEnd[1].b));
                for (int ib=(ib1-SplitBracketMin); ib<(ib1+SplitBracketMin); ib++) {
                    if (bq[ib]<SplitBaseQmin) {
                        split1=false;
                        break;
                    }
                }
            }
            bool UP=(split0|split1)&((c1+c0)>minLR);
            if (UP && (fpUP!=0)) {
                Nout++;
                int s1=samwrite(fpUP, &(bampair.BamEnd[0].b));
                int s2=samwrite(fpUP, &(bampair.BamEnd[1].b));
                if ((s1*s2)>0) {
                    continue;
                } else {
                    cerr << "bad write to uPart.bam" << endl;
                    exit(154);
                }
            }
        }
        if ( (outUniqueUnmappedBam) & ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin)) ) {
            bool z0=((bampair.BamEnd[0].b.core.flag&BAM_FUNMAP)>0);
            bool z1=((bampair.BamEnd[1].b.core.flag&BAM_FUNMAP)>0);  
            
            
            uint8_t*  bq=bam1_qual(&(bampair.BamEnd[0].b));
            for (int nb,ib=0; ib<bampair.BamEnd[0].b.core.l_qseq; ib++) {
                if (bq[ib]<SplitBaseQmin) {
                    nb++;
                }
            }

            
            bool UZ=(z0|z1)&(!(z1&z0));
            if (UZ && (fpUZ!=0)) {
                Nout++;
                int s1=samwrite(fpUZ, &(bampair.BamEnd[0].b));
                int s2=samwrite(fpUZ, &(bampair.BamEnd[1].b));
                if ((s1*s2)>0) {
                    continue;
                } else {
                    cerr << "bad write to uUnmapped.bam" << endl;
                    exit(155);
                }
            }
        }
        
        
        //cout<< bampair.Orientation << "\t"<< bampair.FragmentLength << "\t" <<bampair.BamEnd[1].b.core.pos << endl;
        
    }
    
    if (outReadPairPosBam) {
        samclose(fpWP);
    } else {        
        if (outAllPairsBam) {
            samclose(fpAP);
        } else {       
            samclose(fpFT);
            samclose(fpIC);    
            samclose(fpUP);    
            samclose(fpUM);
            samclose(fpUZ);
        }
    }
    
    /*
     for (ioutputBam=outputBam.begin(); ioutputBam!=outputBam.end(); ioutputBam++) {
        (*ioutputBam).second.close();
    }
     
    if (FragmentTailPercent>0) 
        outputBam["FT"].close();
    */
    
    samclose(Bam.fp);
    
    
}
Beispiel #7
0
int main () {
    bam_header_t sheader;
    
    sheader.n_targets = 2;
    char testSeq1[10] = "HELLOSEQ";
    char testSeq2[10] = "HELLOSE2";
    sheader.target_name = (char**) malloc(sizeof(char*)*2);
    sheader.target_name[0] = testSeq1;
    sheader.target_name[1] = testSeq2;
    sheader.target_len = (uint32_t*) malloc(sizeof(uint32_t)*2);
    sheader.target_len[0] = 10;
    sheader.target_len[1] = 10;
    
    sheader.hash = NULL;
    sheader.hash = NULL;
    
    sheader.l_text=0;
    sheader.text = (char*) malloc(sizeof(char)*5);
    sheader.text = "@TEST";
    
    samfile_t * sptr = samopen("ASD","wh",&sheader);
    
    bam1_t salignment;
    salignment.core.tid = 1;
    salignment.core.pos = 2456;
    salignment.core.bin = bam_reg2bin(0,0);
    salignment.core.qual = 255; //quality
    salignment.core.l_qname = 6; //lenght of the query name
    salignment.core.flag = 0;
    salignment.core.n_cigar = 1;
    salignment.core.l_qseq = 8; //length of the read
    
    salignment.core.mtid = 0;
    salignment.core.mpos = 1357;
    salignment.core.isize = 1;
        
    salignment.l_aux = 0;
    salignment.data_len = 0;
    salignment.m_data = 255;
    salignment.data = malloc(sizeof(uint8_t)*255);
    
    //Name
    SAMIUint8ConcatString(salignment.data,&(salignment.data_len),"QNAME",6);
    //CIGAR
    SAMIUint8ConcatUint32(salignment.data,&(salignment.data_len),128);
    //Read
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),17);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),17);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),34);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),34);
    //Quality
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff);
    
    unsigned int auxStart = salignment.data_len;
    SAMIUint8ConcatString(salignment.data,&(salignment.data_len),"XAZTESTING",11);
    salignment.l_aux += salignment.data_len - auxStart; 
    //salignment.data = "QNAME\0\0\0\0\0AAAACCCC!!!!!!!!ASD";
    
    
    unsigned int * asd = bam1_cigar(&salignment);
    
    printf("%u\n",(uint32_t) (*asd));
    
    samwrite(sptr,&salignment);
    
    salignment.core.tid = 0;
    salignment.core.pos = 1357;
    samwrite(sptr,&salignment);
    
    
    samclose(sptr);

    return 0;
}
Beispiel #8
0
int bsstrand_func(bam1_t *b, const samfile_t *in, samfile_t *out, void *data) {

	bsstrand_data_t *d = (bsstrand_data_t*)data;
	bsstrand_conf_t *conf = d->conf;
	const bam1_core_t *c = &b->core;

	if (c->flag & BAM_FUNMAP){
		if (out) samwrite(out, b);
		d->n_unmapped++;
		return 0;
	}
	
	fetch_refseq(d->rs, in->header->target_name[c->tid], c->pos, c->pos+1);
	uint32_t rpos=c->pos+1, qpos=0;
	int i, nC2T = 0, nG2A = 0;
	uint32_t j;
	char rbase, qbase;

	for (i=0; i<c->n_cigar; ++i) {
		uint32_t op = bam_cigar_op(bam1_cigar(b)[i]);
		uint32_t oplen = bam_cigar_oplen(bam1_cigar(b)[i]);
		switch(op) {
		case BAM_CMATCH:
			for(j=0; j<oplen; ++j) {
				rbase = toupper(getbase_refseq(d->rs, rpos+j));
				qbase = bscall(bam1_seq(b), qpos+j);
				if (rbase == 'C' && qbase == 'T') nC2T += 1;
				if (rbase == 'G' && qbase == 'A') nG2A += 1;
				/* printf("%c vs %c\n", toupper(rbase), qbase); */
			}
			rpos += oplen;
			qpos += oplen;
			break;
		case BAM_CINS:
			qpos += oplen;
			break;
		case BAM_CDEL:
			rpos += oplen;
			break;
		case BAM_CSOFT_CLIP:
			qpos += oplen;
			break;
		default:
			fprintf(stderr, "Unknown cigar, %u\n", op);
			abort();
		}
	}

	char key[2] = {'Z','S'};
	unsigned char *bsstrand = bam_aux_get(b, key);
	if (bsstrand) {
		bsstrand++;
		double s = similarity(nG2A, nC2T);
		if (nG2A > 1 && nC2T > 1 && s > 0.5) {
			if (conf->output_read || conf->output_all_read)
				printf("F\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '?';
			d->n_fail++;
		} else if (*bsstrand == '+' && nG2A > nC2T + 2) {
			if (conf->output_read || conf->output_all_read)
				printf("W2C\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '-';
			d->n_corr++;
		} else if (*bsstrand == '-' && nC2T > nG2A + 2) {
			if (conf->output_read || conf->output_all_read)
				printf("C2W\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
			bam_aux_append(b, "OS", 'A', 1, bsstrand);
			bsstrand[0] = '+';
			d->n_corr++;
		} else if (conf->output_all_read) {
			printf("N\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s);
		}
	} else if (!(c->flag & BAM_FUNMAP) && conf->infer_bsstrand) {
		char bss[3];
		if (similarity(nG2A, nC2T) < 0.5) {
			strcpy(bss, "??");
		} else if (nC2T > nG2A) {
			strcpy(bss, c->flag & BAM_FREVERSE ? "+-" : "++");
		} else {
			strcpy(bss, c->flag & BAM_FREVERSE ? "-+" : "--");
		}
		bam_aux_append(b, "ZS", 'Z', 3, (uint8_t*) bss);
	}

	
	if (out) samwrite(out, b);
	d->n_mapped++;

	return 0;
}
Beispiel #9
0
void filter_by_id(const char* fn, hash_table* T)
{
    fprintf(stderr, "filtering ... \n");

    samfile_t* fin = samopen(fn, "rb", NULL);
    if (fin == NULL) {
        fprintf(stderr, "can't open bam file %s\n", fn);
        exit(1);
    }

    samfile_t* fout = samopen("-", "w", (void*)fin->header);
    if (fout == NULL) {
        fprintf(stderr, "can't open stdout, for some reason.\n");
        exit(1);
    }

    fputs(fin->header->text, stdout);

    bam1_t* b = bam_init1();
    uint32_t n = 0;

    char* qname = NULL;
    size_t qname_size = 0;

    while (samread(fin, b) >= 0) {
        if (++n % 1000000 == 0) {
            fprintf(stderr, "\t%d reads\n", n);
        }


        if (qname_size < b->core.l_qname + 3) {
            qname_size = b->core.l_qname + 3;
            qname = realloc(qname, qname_size);
        }

        memcpy(qname, bam1_qname(b), b->core.l_qname);

        if (b->core.flag & BAM_FREAD2) {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '2';
            qname[b->core.l_qname + 2] = '\0';
        }
        else {
            qname[b->core.l_qname]     = '/';
            qname[b->core.l_qname + 1] = '1';
            qname[b->core.l_qname + 2] = '\0';
        }

        if (get_hash_table(T, qname, b->core.l_qname + 2) == 1) {
            samwrite(fout, b);
        }
    }

    free(qname);

    bam_destroy1(b);
    samclose(fout);
    samclose(fin);

    fprintf(stderr, "done.\n");
}