int _walk_through_sam_and_split(samfile_t * fin, samfile_t **foutList) { bam1_t *b = bam_init1(); int r, count = 0; while (0 <= (r = samread(fin, b))) { if(b->core.tid > -1){ samwrite(foutList[b->core.tid], b); }else{ samwrite(foutList[fin->header->n_targets], b); } count++; } bam_destroy1(b); return r >= -1 ? count : -1 * count; }
static void dump_buf(buffer_t *buf, samfile_t *out) { int i; for (i = 0; i < buf->n; ++i) { elem_t *e = buf->buf + i; if (e->score != -1) break; if (e->b) { samwrite(out, e->b); bam_destroy1(e->b); e->b = 0; } } }
int main(int argc, char* argv[]) { samfile_t *ifile = NULL, *ofile = NULL; bam1_t *read = bam_init1(); int keep = 0; char *p = NULL; //Open input file, either SAM or BAM p = strrchr(argv[1], '.'); if(strcmp(p, ".bam") == 0) { ifile = samopen(argv[1], "rb", NULL); } else { ifile = samopen(argv[1], "r", NULL); } bam_header_t *head = ifile->header; //Open output file // ofile = samopen("AND_type.bam", "wb", ifile->header); ofile = samopen(argv[2], "wb", ifile->header); //Iterate through the lines while(samread(ifile, read) > 1) { keep = 0; //Is the read's mate on the same chromosome/contig? if(read->core.tid == read->core.mtid) { //Are the mates on opposite strands? if(read->core.flag & BAM_FREVERSE && !(read->core.flag & BAM_FMREVERSE)) { if(read->core.pos < read->core.mpos) { // Are mates 500 bp or less from the ends? if (read-> core.pos <= 500 && read->core.mpos > head->target_len[read->core.tid] - 500) keep=1; } } else if(!(read->core.flag & BAM_FREVERSE) && read->core.flag & BAM_FMREVERSE) { if(read->core.mpos < read->core.pos) { if (read-> core.mpos <= 500 && read->core.pos > head->target_len[read->core.tid] - 500) keep=1; } } } if(keep) samwrite(ofile, read); } bam_destroy1(read); samclose(ifile); samclose(ofile); return 0; }
void filter_by_id(const char* fn, hash_table* T) { fprintf(stderr, "filtering ... \n"); samfile_t* fin = samopen(fn, "rb", NULL); if (fin == NULL) { fprintf(stderr, "can't open bam file %s\n", fn); exit(1); } samfile_t* fout = samopen("-", "w", (void*)fin->header); if (fout == NULL) { fprintf(stderr, "can't open stdout, for some reason.\n"); exit(1); } fputs(fin->header->text, stdout); bam1_t* b = bam_init1(); uint32_t n = 0; while (samread(fin, b) >= 0) { if (++n % 1000000 == 0) { fprintf(stderr, "\t%d reads\n", n); } if (get_hash_table(T, bam1_qname(b), b->core.l_qname) == 1) { samwrite(fout, b); } } bam_destroy1(b); samclose(fout); samclose(fin); fprintf(stderr, "done.\n"); }
int tmap_seqs_io_sff2sam_main(int argc, char *argv[]) { int c, help = 0; tmap_seqs_io_t *io_in = NULL; tmap_seqs_t *seqs = NULL; char **sam_rg = NULL; int32_t sam_rg_num = 0; int bidirectional = 0, sam_flowspace_tags = 0; int out_type = 0; tmap_sam_io_t *io_out = NULL; bam_header_t *header = NULL; // BAM Header int32_t i; /* uint8_t *key_seq = NULL; int key_seq_len = 0; */ while((c = getopt(argc, argv, "DGR:Yvh")) >= 0) { switch(c) { case 'D': bidirectional = 1; break; case 'G': break; case 'R': sam_rg = tmap_realloc(sam_rg, (1+sam_rg_num) * sizeof(char*), "sam_rg"); sam_rg[sam_rg_num] = tmap_strdup(optarg); sam_rg_num++; break; case 'Y': sam_flowspace_tags = 1; break; case 'v': tmap_progress_set_verbosity(1); break; case 'h': help = 1; break; default: return 1; } } if(1 != argc - optind || 1 == help) { tmap_file_fprintf(tmap_file_stderr, "Usage: %s %s [-R -Y -v -h] <in.sff>\n", PACKAGE, argv[0]); return 1; } // input io_in = tmap_seqs_io_init(&argv[optind], 1, TMAP_SEQ_TYPE_SFF, TMAP_FILE_NO_COMPRESSION, 0l, 0l); // BAM Header header = tmap_seqs_io_to_bam_header(NULL, io_in, sam_rg, sam_rg_num, argc, argv); // open the output file switch(out_type) { case 0: // SAM io_out = tmap_sam_io_init2("-", "wh", header); break; case 1: io_out = tmap_sam_io_init2("-", "wb", header); break; case 2: io_out = tmap_sam_io_init2("-", "wbu", header); break; default: tmap_bug(); } // destroy the BAM Header bam_header_destroy(header); header = NULL; seqs = tmap_seqs_init(TMAP_SEQ_TYPE_SFF); while(0 < tmap_seqs_io_read(io_in, seqs, io_out->fp->header->header)) { bam1_t *b = NULL; tmap_seq_t *seq = seqs->seqs[0]; b = tmap_sam_convert_unmapped(seq, sam_flowspace_tags, bidirectional, NULL, 0, 0, 0, 0, 0, 0, "\tlq:i:%d\trq:i:%d\tla:i:%d\trq:i:%d", seq->data.sff->rheader->clip_qual_left, seq->data.sff->rheader->clip_qual_right, seq->data.sff->rheader->clip_adapter_left, seq->data.sff->rheader->clip_adapter_right); if(samwrite(io_out->fp, b) <= 0) { tmap_error("Error writing the SAM file", Exit, WriteFileError); } bam_destroy1(b); tmap_seqs_destroy(seqs); seqs = tmap_seqs_init(TMAP_SEQ_TYPE_SFF); } tmap_seqs_destroy(seqs); // free memory tmap_seqs_io_destroy(io_in); tmap_sam_io_destroy(io_out); for(i=0;i<sam_rg_num;i++) { free(sam_rg[i]); } free(sam_rg); return 0; }
BamX::BamX(pars & Params1) // optional constructor { // parameters Params=Params1; Nread=0; Npair=0; Nproper=0; Nout=0; LFlow=INT_MIN; LFhigh=INT_MAX; region.limit=false; IlluminizeBam=0; outFragTailBam=false; outInterChromBam=false; outUniqueMultipleBam=false; outUniquePartialBam=false; outUniqueUnmappedBam=false; outAllPairsBam=false; outReadPairPosBam=false; //output file //samfile_t *fp; bam_header_t *bam_header; string s = Params.getInput(); BamUtil bam1(s); Bam = bam1; string filename=extractfilename(s); // parameters string fragPosFile = Params.getString("ReadPairPosFile"); string r = Params.getString("ChromRegion"); int maxReads = Params.getInt("MaxReads"); Qmin = Params.getInt("Qmin"); LRmin = Params.getInt("MinReadLength"); maxmismatchPC=Params.getDouble("FractionMaxMisMatches"); FragLengthWindow=Params.getInt("FragmentLengthWindow"); int cmd_MateMode=Params.getInt("ReadPairSenseConfig"); string ReferenceFastaFile=Params.getString("ReferenceFastaFile"); FragmentTailPercent=Params.getDouble("FragmentTailPercent"); IlluminizeBam=Params.getInt("Illuminize")>0; outputDirectory=Params.getString("OutputDirectory"); int minLR=Params.getInt("MinReadLength"); int SplitBracketMin=Params.getInt("SplitBracketMin"); int SplitBaseQmin=Params.getInt("SplitBaseQmin"); string StatFile=Params.getString("StatFile"); if (StatFile.size()>0) { hists H1(StatFile); hist HLF=H1.h["LF"]; hist HLR=H1.h["LR"]; Params.setHist("LF",HLF); Params.setHist("LR",HLR); H1.h.clear(); // free some memory if (FragmentTailPercent>0) { LFlow=int(HLF.p2xTrim(FragmentTailPercent/100.)); LFhigh=int(HLF.p2xTrim(1-FragmentTailPercent/100.)); } } int dbg = Params.getInt("Dbg"); time(&tprev); if (ReferenceFastaFile.size()>0) { FastaObj RF1(ReferenceFastaFile, ""); Reference=RF1; RF1.seq.clear(); // free some memory } bam_header= Bam.fp->header; string bamheadertext = bam_header->text; ReadGroup = extractBamTag(bamheadertext,"@RG"); outAllPairsBam=(r.size()>0); if (!outAllPairsBam) { outFragTailBam=true; //FragmentTailPercent>=0; outInterChromBam=true; outUniqueMultipleBam=true; outUniquePartialBam=true; outUniqueUnmappedBam=true; } // output Bams outputBam.clear(); /* // test BamHeaderContainer vector<BamHeaderContainer> x; string sv=SpannerVersion; string q="@PG\tID:FragmentTail\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); while (true) { string outfile=outputDirectory+"/"+filename+".fragtail.bam"; q=q+"\n@PG\tID:FragmentTail\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); BamHeaderContainer x1( bam_header, q); x.push_back(x1); bam_header_t* h1=x[x.size()-1].header(); cout<< h1->text << endl; } cout << x.size() << endl; */ samfile_t *fpFT=0; samfile_t *fpIC=0; samfile_t *fpUM=0; samfile_t *fpUP=0; samfile_t *fpUZ=0; samfile_t *fpAP=0; samfile_t *fpWP=0; //region if (r.size()>0) { int r1,r2,r3; C_region r0(r); region=r0; string bamRegion=region.region; size_t k=bamRegion.find("chr"); if (k!=string::npos) { bamRegion=bamRegion.substr(3); } if ( bam_parse_region(bam_header, bamRegion.c_str(), &r1, &r2, &r3)==0) { region.limit=true; region.anchor=r1; region.start=r2; region.end=r3; } else { cerr << "region not found\t" << r << endl; exit(111); } } //fragPosFile if (fragPosFile.size()>0) { FragmentPosFileObj fp(fragPosFile); if (fp.fragmentPosList.size()>0) { FragPos=fp; } else { cerr << "Read Pair Pos file not found\t" << fragPosFile << endl; exit(112); } outFragTailBam=false; outInterChromBam=false; outUniqueMultipleBam=false; outUniquePartialBam=false; outUniqueUnmappedBam=false; outReadPairPosBam=true; } if (outAllPairsBam) { string outfile=outputDirectory+"/"+filename+"."+r+".bam"; string sv=SpannerVersion; string q="@PG\tID:Region\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); outputBam["AP"]=BamHeaderContainer(bam_header,q); bam_header_t* h1=outputBam["AP"].header(); if ((fpAP = samopen(outfile.c_str(), "wb", h1)) == 0) { fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str()); exit(160); } } if (outFragTailBam) { string outfile=outputDirectory+"/"+filename+".fragtail.bam"; string sv=SpannerVersion; string q="@PG\tID:FragmentTail\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); outputBam["FT"]=BamHeaderContainer(bam_header,q); bam_header_t* h1=outputBam["FT"].header(); if ((fpFT = samopen(outfile.c_str(), "wb", h1)) == 0) { fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str()); exit(161); } } if (outInterChromBam) { string outfile=outputDirectory+"/"+filename+".interchrom.bam"; string sv=SpannerVersion; string q="@PG\tID:InterChromPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); outputBam["IC"]=BamHeaderContainer(bam_header,q); bam_header_t* h1=outputBam["IC"].header(); if ((fpIC = samopen(outfile.c_str(), "wb", h1)) == 0) { fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str()); exit(162); } } if (outUniqueMultipleBam) { string outfile=outputDirectory+"/"+filename+".uMult.bam"; string sv=SpannerVersion; string q="@PG\tID:uniqMultiplyMappedPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); outputBam["UM"]=BamHeaderContainer(bam_header,q); bam_header_t* h1=outputBam["IUM"].header(); if ((fpUM = samopen(outfile.c_str(), "wb", h1)) == 0) { fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str()); exit(163); } } if (outUniquePartialBam) { string outfile=outputDirectory+"/"+filename+".uPart.bam"; string sv=SpannerVersion; string q="@PG\tID:uniqPartiallyMappedPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); outputBam["UP"]=BamHeaderContainer(bam_header,q); bam_header_t* h1=outputBam["UP"].header(); if ((fpUP = samopen(outfile.c_str(), "wb", h1)) == 0) { fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str()); exit(164); } } if (outUniqueUnmappedBam) { string outfile=outputDirectory+"/"+filename+".uUnmapped.bam"; string sv=SpannerVersion; string q="@PG\tID:uniqUnMappedPairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); outputBam["UZ"]=BamHeaderContainer(bam_header,q); bam_header_t* h1=outputBam["UZ"].header(); if ((fpUZ = samopen(outfile.c_str(), "wb", h1)) == 0) { fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str()); exit(165); } } if (outReadPairPosBam) { string outfile=outputDirectory+"/"+filename+".weirdpairs.bam"; string sv=SpannerVersion; string q="@PG\tID:weirdpairs\tPN:SpannerX\tVN"+sv+"\tCL:"+Params.getCmdLine(); outputBam["WP"]=BamHeaderContainer(bam_header,q); bam_header_t* h1=outputBam["WP"].header(); if ((fpWP = samopen(outfile.c_str(), "wb", h1)) == 0) { fprintf(stderr, "samopen: Fail to open output BAM file %s\n", filename.c_str()); exit(165); } } cout << ReadGroup << endl << endl; //extractMateMode(); if (cmd_MateMode>=0) MateMode=cmd_MateMode; BamContainerPair bampair; bool more = true; while (more) { bampair=Bam.getNextBamPair(); // skip if neither end within region more=(bampair.BamEnd.size()>1); Npair++; if (Npair>=maxReads) break; // if ( (dbg!=0)&&(elapsedtime()>float(dbg))) { time(&tprev); cout << " pairs:" << Npair << "\toutput:" << Nout; cout << "\tchr:" << bampair.BamEnd[0].b.core.tid+1; cout << "\tpos:" << bampair.BamEnd[0].b.core.pos; cout << endl; } if (!more) continue; if (region.limit) { bool overlap = false; for (int e=0; e<=1; e++) { int a1=bampair.BamEnd[e].b.core.tid; int p1=bampair.BamEnd[e].b.core.pos; int p2=p1+bampair.BamEnd[e].len; overlap=region.overlap(a1,p1,p2); if (overlap) break; } if (!overlap) continue; } bampair.Illuminize(IlluminizeBam); bampair.calcFragmentLengths(); more=(bampair.BamEnd[1].packeddata.size()>1); //if (bampair.BamEnd[0].b.core.tid==bampair.BamEnd[1].b.core.tid) // cout<< bampair << endl; bool bothmap = ((bampair.BamEnd[0].b.core.flag&BAM_FUNMAP)==0)&&((bampair.BamEnd[0].b.core.flag&BAM_FMUNMAP)==0); if (outAllPairsBam) { Nout++; int s1=samwrite(fpAP, &(bampair.BamEnd[0].b)); int s2=samwrite(fpAP, &(bampair.BamEnd[1].b)); if ((s1*s2)>0) { continue; } else { cerr << "bad write to pairs.bam" << endl; exit(150); } } if (outReadPairPosBam) { int ichr1=bampair.BamEnd[0].b.core.tid+1; int istd1=bampair.BamEnd[0].sense=='+'? 0: 1; int ista1=bampair.BamEnd[0].b.core.pos+1; int iq1=bampair.BamEnd[0].q; int ichr2=bampair.BamEnd[1].b.core.tid+1; int istd2=bampair.BamEnd[1].sense=='+'? 0: 1; int ista2=bampair.BamEnd[1].b.core.pos+1; int iq2=bampair.BamEnd[1].q; FragmentPosObj fp1(0,ichr1,istd1,ista1,0,ichr2,istd2,ista2,0,iq1, iq2,0); /* if ((fp1.chr1==10)&&(fp1.start1>=89687801)&&(fp1.end1<=89700722)) { cout << "read "<< fp1 << endl; } */ if (FragPos.find(fp1)) { Nout++; int s1=samwrite(fpWP, &(bampair.BamEnd[0].b)); int s2=samwrite(fpWP, &(bampair.BamEnd[1].b)); if ((s1*s2)>0) { continue; } else { cerr << "bad write to weirdpairs.bam" << endl; exit(156); } } } bool ok[2]; for (int e=0; e<2; e++) { uint8_t* bq=bam1_qual(&(bampair.BamEnd[e].b)); int LR=bampair.BamEnd[0].b.core.l_qseq; double bok=0; for (int ib=0; ib<LR; ib++) { if (bq[ib]>SplitBaseQmin) { bok++; } } ok[e]=(bok>LRmin); } if (! (ok[0]&ok[1]) ) continue; if ( (outFragTailBam) & ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin)) ) { bool FT=(bampair.FragmentLength>LFhigh)|((bampair.FragmentLength<LFlow)&(bampair.FragmentLength>INT_MIN))&bothmap; if (FT && (fpFT!=0)) { Nout++; int s1=samwrite(fpFT, &(bampair.BamEnd[0].b)); int s2=samwrite(fpFT, &(bampair.BamEnd[1].b)); //if (outputBam["FT"].write(&(bampair.BamEnd[0].b),&(bampair.BamEnd[1].b))) { if ((s1*s2)>0) { continue; } else { cerr << "bad write to fragtail.bam" << endl; exit(151); } } } if ((outInterChromBam) & ((bampair.BamEnd[0].q>=Qmin)&(bampair.BamEnd[1].q>=Qmin))) { bool IC=(bampair.BamEnd[0].b.core.tid!=bampair.BamEnd[1].b.core.tid)&&bothmap; if (IC && (fpIC!=0)) { Nout++; int s1=samwrite(fpIC, &(bampair.BamEnd[0].b)); int s2=samwrite(fpIC, &(bampair.BamEnd[1].b)); if ((s1*s2)>0) { continue; } else { cerr << "bad write to interchrom.bam" << endl; exit(152); } } } if ((outUniqueMultipleBam) & ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin))){ int im=bampair.BamEnd[0].nmap>1? 0: 1; int iu=bampair.BamEnd[0].q>=Qmin? 0: 1; bool UM=(bampair.BamEnd[iu].nmap>1)&&(iu!=im)&&bothmap; if (UM && (fpUM!=0)) { Nout++; int s1=samwrite(fpUM, &(bampair.BamEnd[0].b)); int s2=samwrite(fpUM, &(bampair.BamEnd[1].b)); if ((s1*s2)>0) { continue; } else { cerr << "bad write to uMult.bam" << endl; exit(153); } } } if ( (outUniquePartialBam) && ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin)) && bothmap) { int c0=bampair.BamEnd[0].clip[0]+bampair.BamEnd[0].clip[1]; int LR=bampair.BamEnd[0].b.core.l_qseq; bool split0=((LR-c0)>SplitBracketMin)&(c0>SplitBracketMin); int ib0=0; if ((split0)&(bampair.BamEnd[0].clip[0]>SplitBracketMin)) { ib0=bampair.BamEnd[0].clip[0]; } else if ((split0)&(bampair.BamEnd[0].clip[1]>SplitBracketMin) ) { ib0=LR-bampair.BamEnd[0].clip[1]; } split0=split0&(ib0>0); if (split0) { uint8_t* bq=bam1_qual(&(bampair.BamEnd[0].b)); for (int ib=(ib0-SplitBracketMin); ib<(ib0+SplitBracketMin); ib++) { if (bq[ib]<SplitBaseQmin) { split0=false; break; } } } int c1=bampair.BamEnd[1].clip[0]+bampair.BamEnd[1].clip[1]; LR=bampair.BamEnd[1].b.core.l_qseq; bool split1=((LR-c0)>SplitBracketMin)&(c1>SplitBracketMin);; int ib1=0; if ((split1)&(bampair.BamEnd[1].clip[0]>SplitBracketMin)) { ib1=bampair.BamEnd[1].clip[0]; } else if ((split1)&(bampair.BamEnd[1].clip[1]>SplitBracketMin) ) { ib1=LR-bampair.BamEnd[1].clip[1]; } split1=split1&(ib1>0); if (split1) { uint8_t* bq=bam1_qual(&(bampair.BamEnd[1].b)); for (int ib=(ib1-SplitBracketMin); ib<(ib1+SplitBracketMin); ib++) { if (bq[ib]<SplitBaseQmin) { split1=false; break; } } } bool UP=(split0|split1)&((c1+c0)>minLR); if (UP && (fpUP!=0)) { Nout++; int s1=samwrite(fpUP, &(bampair.BamEnd[0].b)); int s2=samwrite(fpUP, &(bampair.BamEnd[1].b)); if ((s1*s2)>0) { continue; } else { cerr << "bad write to uPart.bam" << endl; exit(154); } } } if ( (outUniqueUnmappedBam) & ((bampair.BamEnd[0].q>=Qmin)|(bampair.BamEnd[1].q>=Qmin)) ) { bool z0=((bampair.BamEnd[0].b.core.flag&BAM_FUNMAP)>0); bool z1=((bampair.BamEnd[1].b.core.flag&BAM_FUNMAP)>0); uint8_t* bq=bam1_qual(&(bampair.BamEnd[0].b)); for (int nb,ib=0; ib<bampair.BamEnd[0].b.core.l_qseq; ib++) { if (bq[ib]<SplitBaseQmin) { nb++; } } bool UZ=(z0|z1)&(!(z1&z0)); if (UZ && (fpUZ!=0)) { Nout++; int s1=samwrite(fpUZ, &(bampair.BamEnd[0].b)); int s2=samwrite(fpUZ, &(bampair.BamEnd[1].b)); if ((s1*s2)>0) { continue; } else { cerr << "bad write to uUnmapped.bam" << endl; exit(155); } } } //cout<< bampair.Orientation << "\t"<< bampair.FragmentLength << "\t" <<bampair.BamEnd[1].b.core.pos << endl; } if (outReadPairPosBam) { samclose(fpWP); } else { if (outAllPairsBam) { samclose(fpAP); } else { samclose(fpFT); samclose(fpIC); samclose(fpUP); samclose(fpUM); samclose(fpUZ); } } /* for (ioutputBam=outputBam.begin(); ioutputBam!=outputBam.end(); ioutputBam++) { (*ioutputBam).second.close(); } if (FragmentTailPercent>0) outputBam["FT"].close(); */ samclose(Bam.fp); }
int main () { bam_header_t sheader; sheader.n_targets = 2; char testSeq1[10] = "HELLOSEQ"; char testSeq2[10] = "HELLOSE2"; sheader.target_name = (char**) malloc(sizeof(char*)*2); sheader.target_name[0] = testSeq1; sheader.target_name[1] = testSeq2; sheader.target_len = (uint32_t*) malloc(sizeof(uint32_t)*2); sheader.target_len[0] = 10; sheader.target_len[1] = 10; sheader.hash = NULL; sheader.hash = NULL; sheader.l_text=0; sheader.text = (char*) malloc(sizeof(char)*5); sheader.text = "@TEST"; samfile_t * sptr = samopen("ASD","wh",&sheader); bam1_t salignment; salignment.core.tid = 1; salignment.core.pos = 2456; salignment.core.bin = bam_reg2bin(0,0); salignment.core.qual = 255; //quality salignment.core.l_qname = 6; //lenght of the query name salignment.core.flag = 0; salignment.core.n_cigar = 1; salignment.core.l_qseq = 8; //length of the read salignment.core.mtid = 0; salignment.core.mpos = 1357; salignment.core.isize = 1; salignment.l_aux = 0; salignment.data_len = 0; salignment.m_data = 255; salignment.data = malloc(sizeof(uint8_t)*255); //Name SAMIUint8ConcatString(salignment.data,&(salignment.data_len),"QNAME",6); //CIGAR SAMIUint8ConcatUint32(salignment.data,&(salignment.data_len),128); //Read SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),17); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),17); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),34); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),34); //Quality SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); SAMIUint8ConcatUint8(salignment.data,&(salignment.data_len),0xff); unsigned int auxStart = salignment.data_len; SAMIUint8ConcatString(salignment.data,&(salignment.data_len),"XAZTESTING",11); salignment.l_aux += salignment.data_len - auxStart; //salignment.data = "QNAME\0\0\0\0\0AAAACCCC!!!!!!!!ASD"; unsigned int * asd = bam1_cigar(&salignment); printf("%u\n",(uint32_t) (*asd)); samwrite(sptr,&salignment); salignment.core.tid = 0; salignment.core.pos = 1357; samwrite(sptr,&salignment); samclose(sptr); return 0; }
int bsstrand_func(bam1_t *b, const samfile_t *in, samfile_t *out, void *data) { bsstrand_data_t *d = (bsstrand_data_t*)data; bsstrand_conf_t *conf = d->conf; const bam1_core_t *c = &b->core; if (c->flag & BAM_FUNMAP){ if (out) samwrite(out, b); d->n_unmapped++; return 0; } fetch_refseq(d->rs, in->header->target_name[c->tid], c->pos, c->pos+1); uint32_t rpos=c->pos+1, qpos=0; int i, nC2T = 0, nG2A = 0; uint32_t j; char rbase, qbase; for (i=0; i<c->n_cigar; ++i) { uint32_t op = bam_cigar_op(bam1_cigar(b)[i]); uint32_t oplen = bam_cigar_oplen(bam1_cigar(b)[i]); switch(op) { case BAM_CMATCH: for(j=0; j<oplen; ++j) { rbase = toupper(getbase_refseq(d->rs, rpos+j)); qbase = bscall(bam1_seq(b), qpos+j); if (rbase == 'C' && qbase == 'T') nC2T += 1; if (rbase == 'G' && qbase == 'A') nG2A += 1; /* printf("%c vs %c\n", toupper(rbase), qbase); */ } rpos += oplen; qpos += oplen; break; case BAM_CINS: qpos += oplen; break; case BAM_CDEL: rpos += oplen; break; case BAM_CSOFT_CLIP: qpos += oplen; break; default: fprintf(stderr, "Unknown cigar, %u\n", op); abort(); } } char key[2] = {'Z','S'}; unsigned char *bsstrand = bam_aux_get(b, key); if (bsstrand) { bsstrand++; double s = similarity(nG2A, nC2T); if (nG2A > 1 && nC2T > 1 && s > 0.5) { if (conf->output_read || conf->output_all_read) printf("F\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s); bam_aux_append(b, "OS", 'A', 1, bsstrand); bsstrand[0] = '?'; d->n_fail++; } else if (*bsstrand == '+' && nG2A > nC2T + 2) { if (conf->output_read || conf->output_all_read) printf("W2C\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s); bam_aux_append(b, "OS", 'A', 1, bsstrand); bsstrand[0] = '-'; d->n_corr++; } else if (*bsstrand == '-' && nC2T > nG2A + 2) { if (conf->output_read || conf->output_all_read) printf("C2W\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s); bam_aux_append(b, "OS", 'A', 1, bsstrand); bsstrand[0] = '+'; d->n_corr++; } else if (conf->output_all_read) { printf("N\t%s\t%d\t%d\t%d\t%s\t%s\t%1.2f\n", in->header->target_name[c->tid], c->pos, nC2T, nG2A, bam1_qname(b), bsstrand, s); } } else if (!(c->flag & BAM_FUNMAP) && conf->infer_bsstrand) { char bss[3]; if (similarity(nG2A, nC2T) < 0.5) { strcpy(bss, "??"); } else if (nC2T > nG2A) { strcpy(bss, c->flag & BAM_FREVERSE ? "+-" : "++"); } else { strcpy(bss, c->flag & BAM_FREVERSE ? "-+" : "--"); } bam_aux_append(b, "ZS", 'Z', 3, (uint8_t*) bss); } if (out) samwrite(out, b); d->n_mapped++; return 0; }
void filter_by_id(const char* fn, hash_table* T) { fprintf(stderr, "filtering ... \n"); samfile_t* fin = samopen(fn, "rb", NULL); if (fin == NULL) { fprintf(stderr, "can't open bam file %s\n", fn); exit(1); } samfile_t* fout = samopen("-", "w", (void*)fin->header); if (fout == NULL) { fprintf(stderr, "can't open stdout, for some reason.\n"); exit(1); } fputs(fin->header->text, stdout); bam1_t* b = bam_init1(); uint32_t n = 0; char* qname = NULL; size_t qname_size = 0; while (samread(fin, b) >= 0) { if (++n % 1000000 == 0) { fprintf(stderr, "\t%d reads\n", n); } if (qname_size < b->core.l_qname + 3) { qname_size = b->core.l_qname + 3; qname = realloc(qname, qname_size); } memcpy(qname, bam1_qname(b), b->core.l_qname); if (b->core.flag & BAM_FREAD2) { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '2'; qname[b->core.l_qname + 2] = '\0'; } else { qname[b->core.l_qname] = '/'; qname[b->core.l_qname + 1] = '1'; qname[b->core.l_qname + 2] = '\0'; } if (get_hash_table(T, qname, b->core.l_qname + 2) == 1) { samwrite(fout, b); } } free(qname); bam_destroy1(b); samclose(fout); samclose(fin); fprintf(stderr, "done.\n"); }