void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str) { bam1_t *swap; int i, end; uint32_t *cigar; str->l = 0; if (b1->core.tid != b2->core.tid || b1->core.tid < 0) return; // coordinateless or not on the same chr; skip if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam1_cigar(b1); i < b1->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } end = bam_calend(&b1->core, cigar); kputw(b2->core.pos - end, str); kputc('T', str); kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam1_cigar(b2); i < b2->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } bam_aux_append(b1, "CT", 'Z', str->l+1, (uint8_t*)str->s); }
/* * This function calculates ct tag for two bams, it assumes they are from the same template and * writes the tag to the first read in position terms. */ static void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str) { bam1_t *swap; int i, end; uint32_t *cigar; str->l = 0; if (b1->core.tid != b2->core.tid || b1->core.tid < 0 || b1->core.pos < 0 || b2->core.pos < 0 || b1->core.flag&BAM_FUNMAP || b2->core.flag&BAM_FUNMAP) return; // coordinateless or not on the same chr; skip if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam_get_cigar(b1); i < b1->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } end = bam_endpos(b1); kputw(b2->core.pos - end, str); kputc('T', str); kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand for (i = 0, cigar = bam_get_cigar(b2); i < b2->core.n_cigar; ++i) { kputw(bam_cigar_oplen(cigar[i]), str); kputc(bam_cigar_opchr(cigar[i]), str); } uint8_t* data; if ((data = bam_aux_get(b1,"ct")) != NULL) bam_aux_del(b1, data); if ((data = bam_aux_get(b2,"ct")) != NULL) bam_aux_del(b2, data); bam_aux_append(b1, "ct", 'Z', str->l+1, (uint8_t*)str->s); }
/** * Returns a string representation of this Genome Interval. */ void GenomeInterval::to_string(kstring_t *interval) { interval->l = 0; kputs(seq.c_str(), interval); if (start1!=1 || end1!=((1<<29)-1)) { kputc(':', interval); kputw(start1, interval); kputc('-', interval); kputw(end1, interval); } };
void frequency::prepPrint(funkyPars *pars){ kstring_t *bufstr = new kstring_t ; bufstr->s=NULL; bufstr->l=bufstr->m=0; for(int s=0;s<pars->numSites;s++) { if(pars->keepSites[s]==0) continue; //plugin chr,pos,major,minor kputs(header->name[pars->refId],bufstr);kputc('\t',bufstr); kputw(pars->posi[s]+1,bufstr);kputc('\t',bufstr); kputc(intToRef[pars->major[s]],bufstr);kputc('\t',bufstr); kputc(intToRef[pars->minor[s]],bufstr);kputc('\t',bufstr); //plugin ref, anc if exists if(pars->ref!=NULL) {kputc(intToRef[pars->ref[s]],bufstr);kputc('\t',bufstr);} if(pars->anc!=NULL) {kputc(intToRef[pars->anc[s]],bufstr);kputc('\t',bufstr);} if(doMaf &1) ksprintf(bufstr,"%f\t",pars->results->freq->pml[s]); if(doMaf &2) ksprintf(bufstr,"%f\t",pars->results->freq->pEM[s]); if(doMaf &4) ksprintf(bufstr,"%f\t",pars->results->freq->pmlun[s]); if(doMaf &8) ksprintf(bufstr,"%f\t",pars->results->freq->pEMun[s]); if(doMaf &16) ksprintf(bufstr,"%f\t",pars->results->asso->freq[s]); if(doMaf &32) ksprintf(bufstr,"%f\t",pars->phat[s]); if(doSNP){ if(doMaf &1) ksprintf(bufstr,"%f\t",pars->results->freq->pmlSNP[s]); if(doMaf &2) ksprintf(bufstr,"%f\t",pars->results->freq->pEMSNP[s]); if(doMaf &4) ksprintf(bufstr,"%f\t",pars->results->freq->pmlunSNP[s]); if(doMaf &8) ksprintf(bufstr,"%f\t",pars->results->freq->pEMunSNP[s]); } kputw(pars->keepSites[s],bufstr);kputc('\n',bufstr); } pars->extras[index] = bufstr; }
/** * String version of BED record. */ std::string BEDRecord::to_string() { kstring_t s = {0,0,0}; kputs(this->chrom.c_str(), &s); kputc(':', &s); kputw(this->beg1, &s); kputc('-', &s); kputw(this->end1, &s); std::string str(s.s); if (s.m) free(s.s); return str; };
/** * Returns a string representation of this Genome Interval. */ std::string GenomeInterval::to_string() { kstring_t s = {0,0,0}; kputs(seq.c_str(), &s); if (start1!=1 || end1!=((1<<29)-1)) { kputc(':', &s); kputw(start1, &s); kputc('-', &s); kputw(end1, &s); } std::string interval(s.s); if (s.m) free(s.s); return interval; };
/** * Gets a sorted string representation of a variant. */ void bcf_variant2string_sorted(bcf_hdr_t *h, bcf1_t *v, kstring_t *var) { bcf_print_liten(h,v); bcf_unpack(v, BCF_UN_STR); var->l = 0; kputs(bcf_get_chrom(h, v), var); kputc(':', var); kputw(bcf_get_pos1(v), var); kputc(':', var); if (v->n_allele==2) { kputs(bcf_get_alt(v, 0), var); kputc(',', var); kputs(bcf_get_alt(v, 1), var); } else { char** allele = bcf_get_allele(v); char** temp = (char**) malloc((bcf_get_n_allele(v)-1)*sizeof(char*)); for (int32_t i=1; i<v->n_allele; ++i) { temp[i] = allele[i]; } std::qsort(temp, bcf_get_n_allele(v), sizeof(char*), cmpstr); kputs(bcf_get_alt(v, 0), var); for (int32_t i=0; i<v->n_allele-1; ++i) { kputc(',', var); kputs(temp[i], var); } free(temp); } }
void fml_utg_print(int n, const fml_utg_t *utg) { int i, j, l; kstring_t out = {0,0,0}; for (i = 0; i < n; ++i) { const fml_utg_t *u = &utg[i]; out.l = 0; kputc('@', &out); kputw(i<<1|0, &out); kputc(':', &out); kputw(i<<1|1, &out); kputc('\t', &out); kputw(u->nsr, &out); kputc('\t', &out); for (j = 0; j < u->n_ovlp[0]; ++j) { kputw(u->ovlp[j].id<<1|u->ovlp[j].to, &out); kputc(',', &out); kputw(u->ovlp[j].len, &out); kputc(';', &out); } if (u->n_ovlp[0] == 0) kputc('.', &out); kputc('\t', &out); for (; j < u->n_ovlp[0] + u->n_ovlp[1]; ++j) { kputw(u->ovlp[j].id<<1|u->ovlp[j].to, &out); kputc(',', &out); kputw(u->ovlp[j].len, &out); kputc(';', &out); } if (u->n_ovlp[1] == 0) kputc('.', &out); kputc('\n', &out); l = out.l; kputsn(u->seq, u->len, &out); kputsn("\n+\n", 3, &out); kputsn(u->cov, u->len, &out); kputc('\n', &out); fputs(out.s, stdout); } free(out.s); }
int main_getalt(int argc, char *argv[]) { int c; char *fn; BGZF *fp; bcf1_t *b; bcf_hdr_t *h; kstring_t s = {0,0,0}; while ((c = getopt(argc, argv, "")) >= 0) { } if (argc - optind == 0) { fprintf(stderr, "Usage: bgt getalt <bgt-base>\n"); return 1; } fn = (char*)calloc(strlen(argv[optind]) + 5, 1); sprintf(fn, "%s.bcf", argv[optind]); fp = bgzf_open(fn, "r"); free(fn); assert(fp); h = bcf_hdr_read(fp); b = bcf_init1(); while (bcf_read1(fp, b) >= 0) { char *ref, *alt; int l_ref, l_alt, i, min_l; bcf_get_ref_alt1(b, &l_ref, &ref, &l_alt, &alt); min_l = l_ref < l_alt? l_ref : l_alt; for (i = 0; i < min_l && ref[i] == alt[i]; ++i); s.l = 0; kputs(h->id[BCF_DT_CTG][b->rid].key, &s); kputc(':', &s); kputw(b->pos + 1 + i, &s); kputc(':', &s); kputw(b->rlen - i, &s); kputc(':', &s); kputsn(alt + i, l_alt - i, &s); puts(s.s); } bcf_destroy1(b); bcf_hdr_destroy(h); bgzf_close(fp); free(s.s); return 0; }
void bcf_fmt_array(kstring_t *s, int n, int type, void *data) { int j = 0; if (n == 0) { kputc('.', s); return; } if (type == BCF_BT_CHAR) { char *p = (char*)data; for (j = 0; j < n && *p; ++j, ++p) kputc(*p, s); } else { #define BRANCH(type_t, is_missing, kprint) {\ type_t *p = (type_t *) data; \ for (j=0; j<n && !(is_missing); j++) p++; \ if ( j ) \ { \ p = (type_t *) data; \ for (j=0; j<n; j++, p++) \ { \ if ( j ) kputc(',', s); \ if ( is_missing ) kputc('.', s); \ else kprint; \ } \ if (n && j == 0) kputc('.', s); \ } \ else kputc('.', s); \ } switch (type) { case BCF_BT_INT8: BRANCH(int8_t, *p==INT8_MIN, kputw(*p, s)); break; case BCF_BT_INT16: BRANCH(int16_t, *p==INT16_MIN, kputw(*p, s)); break; case BCF_BT_INT32: BRANCH(int32_t, *p==INT32_MIN, kputw(*p, s)); break; case BCF_BT_FLOAT: BRANCH(float, *(uint32_t*)p==bcf_missing_float, ksprintf(s, "%g", *p)); break; default: fprintf(stderr,"todo: type %d\n", type); exit(1); break; } #undef BRANCH } }
void printAuxBuffered(uint8_t *s, uint8_t *sStop,kstring_t &str ) { // fprintf(stderr,"\ncomp:%p vs %p\n",s,sStop); while (s < sStop) { uint8_t type; kputc('\t', &str);kputc(s[0], &str);kputc(s[1], &str); kputc(':', &str); // fprintf(stderr,"\t%c%c:",s[0],s[1]); s += 2; type = *s; ++s; // fprintf(stderr,"\ntype=%c\n",type);//,(char)*s); // kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str); if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; } else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; } else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; } else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; } else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; } else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; } else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; } else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; } else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; } else if (type == 'B') { uint8_t sub_type = *(s++); int32_t n; memcpy(&n, s, 4); s += 4; // no point to the start of the array kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing for (int i = 0; i < n; ++i) { kputc(',', &str); if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; } else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; } else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; } else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; } else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; } else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; } else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; } } } } // fprintf(stderr,"done\n"); }
/** * Gets a string representation of the underlying VNTR by fuzzy alignment. */ void Variant::get_fuzzy_vntr_string(kstring_t* s) { s->l = 0; kputs(chrom.c_str(), s); kputc(':', s); kputw(vntr.fuzzy_beg1, s); kputc(':', s); kputs(vntr.fuzzy_repeat_tract.c_str(), s); kputc(':', s); kputs("<VNTR>", s); kputc(':', s); kputs(vntr.motif.c_str(), s); };
/** * Gets a string representation of a variant. */ void bcf_variant2string(bcf_hdr_t *h, bcf1_t *v, kstring_t *var) { bcf_unpack(v, BCF_UN_STR); var->l = 0; kputs(bcf_get_chrom(h, v), var); kputc(':', var); kputw(bcf_get_pos1(v), var); kputc(':', var); for (int32_t i=0; i<v->n_allele; ++i) { if (i) kputc(',', var); kputs(bcf_get_alt(v, i), var); } }
/** * Gets the cigar from a BAM record */ void bam_get_cigar_string(bam1_t *s, kstring_t *cigar_string) { cigar_string->l=0; int32_t n_cigar_op = bam_get_n_cigar_op(s); if (n_cigar_op) { uint32_t *cigar = bam_get_cigar(s); for (int32_t i = 0; i < n_cigar_op; ++i) { kputw(bam_cigar_oplen(cigar[i]), cigar_string); kputc(bam_cigar_opchr(cigar[i]), cigar_string); } } }
// Returns 0 on success, -1 on failure. static int bam_format_cigar(const bam1_t* b, kstring_t* str) { // An empty cigar is a special case return "*" rather than "" if (b->core.n_cigar == 0) { return (kputc('*', str) == EOF) ? -1 : 0; } const uint32_t *cigar = bam_get_cigar(b); uint32_t i; for (i = 0; i < b->core.n_cigar; ++i) { if (kputw(bam_cigar_oplen(cigar[i]), str) == EOF) return -1; if (kputc(bam_cigar_opchr(cigar[i]), str) == EOF) return -1; } return 0; }
/** * Gets a string representation of the variant. */ std::string Variant::get_variant_string() { kstring_t var = {0,0,0}; bcf_unpack(v, BCF_UN_STR); var.l = 0; kputs(bcf_get_chrom(h, v), &var); kputc(':', &var); kputw(bcf_get_pos1(v), &var); kputc(':', &var); for (size_t i=0; i<bcf_get_n_allele(v); ++i) { if (i) kputc('/', &var); kputs(bcf_get_alt(v, i), &var); } std::string str(var.s); if (var.m) free(var.s); return str; }
void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a) { int i; kstring_t str = {0,0,0}; for (i = 0; i < a->n; ++i) { const mem_alnreg_t *p = &a->a[i]; int is_rev, rid, qb = p->qb, qe = p->qe; int64_t pos, rb = p->rb, re = p->re; pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev); rid = bns_pos2rid(bns, pos); assert(rid == p->rid); pos -= bns->anns[rid].offset; kputs(s->name, &str); kputc('\t', &str); kputw(s->l_seq, &str); kputc('\t', &str); if (is_rev) qb ^= qe, qe ^= qb, qb ^= qe; // swap kputw(qb, &str); kputc('\t', &str); kputw(qe, &str); kputc('\t', &str); kputs(bns->anns[rid].name, &str); kputc('\t', &str); kputw(bns->anns[rid].len, &str); kputc('\t', &str); kputw(pos, &str); kputc('\t', &str); kputw(pos + (re - rb), &str); kputc('\t', &str); ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb)); kputc('\n', &str); } s->sam = str.s; }
char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of) { uint8_t *s = bam1_seq(b), *t = bam1_qual(b); int i; const bam1_core_t *c = &b->core; kstring_t str; str.l = str.m = 0; str.s = 0; kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str); if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); } else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag); else { // BAM_OFSTR for (i = 0; i < 16; ++i) if ((c->flag & 1<<i) && bam_flag2char_table[i]) kputc(bam_flag2char_table[i], &str); kputc('\t', &str); } if (c->tid < 0) kputsn("*\t", 2, &str); else { if (header) kputs(header->target_name[c->tid] , &str); else kputw(c->tid, &str); kputc('\t', &str); } kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str); if (c->n_cigar == 0) kputc('*', &str); else { for (i = 0; i < c->n_cigar; ++i) { kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str); kputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str); } } kputc('\t', &str); if (c->mtid < 0) kputsn("*\t", 2, &str); else if (c->mtid == c->tid) kputsn("=\t", 2, &str); else { if (header) kputs(header->target_name[c->mtid], &str); else kputw(c->mtid, &str); kputc('\t', &str); } kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str); if (c->l_qseq) { for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str); kputc('\t', &str); if (t[0] == 0xff) kputc('*', &str); else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str); } else kputsn("*\t*", 3, &str); s = bam1_aux(b); while (s < b->data + b->data_len) { uint8_t type, key[2]; key[0] = s[0]; key[1] = s[1]; s += 2; type = *s; ++s; kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str); if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; } else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; } else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; } else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; } else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; } else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; } else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; } else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; } else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; } } return str.s; }
bcf_hdr_t *vcf_hdr_read(htsFile *fp) { if (!fp->is_bin) { kstring_t txt, *s = &fp->line; bcf_hdr_t *h; h = bcf_hdr_init(); txt.l = txt.m = 0; txt.s = 0; while (hts_getline(fp, KS_SEP_LINE, s) >= 0) { if (s->l == 0) continue; if (s->s[0] != '#') { if (hts_verbose >= 2) fprintf(stderr, "[E::%s] no sample line\n", __func__); free(txt.s); bcf_hdr_destroy(h); return 0; } if (s->s[1] != '#' && fp->fn_aux) { // insert contigs here int dret; gzFile f; kstream_t *ks; kstring_t tmp; tmp.l = tmp.m = 0; tmp.s = 0; f = gzopen(fp->fn_aux, "r"); ks = ks_init(f); while (ks_getuntil(ks, 0, &tmp, &dret) >= 0) { int c; kputs("##contig=<ID=", &txt); kputs(tmp.s, &txt); ks_getuntil(ks, 0, &tmp, &dret); kputs(",length=", &txt); kputw(atol(tmp.s), &txt); kputsn(">\n", 2, &txt); if (dret != '\n') while ((c = ks_getc(ks)) != '\n' && c != -1); // skip the rest of the line } free(tmp.s); ks_destroy(ks); gzclose(f); } kputsn(s->s, s->l, &txt); if (s->s[1] != '#') break; kputc('\n', &txt); } h->l_text = txt.l + 1; // including NULL h->text = txt.s; bcf_hdr_parse(h); // check tabix index, are all contigs listed in the header? add the missing ones tbx_t *idx = tbx_index_load(fp->fn); if ( idx ) { int i, n, need_sync = 0; const char **names = tbx_seqnames(idx, &n); for (i=0; i<n; i++) { bcf_hrec_t *hrec = bcf_hdr_get_hrec(h, BCF_DT_CTG, (char*) names[i]); if ( hrec ) continue; hrec = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t)); hrec->key = strdup("contig"); bcf_hrec_add_key(hrec, "ID", strlen("ID")); bcf_hrec_set_val(hrec, hrec->nkeys-1, (char*) names[i], strlen(names[i]), 0); bcf_hrec_add_key(hrec, "length", strlen("length")); bcf_hrec_set_val(hrec, hrec->nkeys-1, "-1", strlen("-1"), 0); // what is a good default value? bcf_hdr_add_hrec(h, hrec); need_sync = 1; } free(names); tbx_destroy(idx); if ( need_sync ) { bcf_hdr_sync(h); bcf_hdr_fmt_text(h); } } return h; } else return bcf_hdr_read((BGZF*)fp->fp); }
void bam_fillmd1_core(bam1_t *b, char *ref, int ref_len, int flag, int max_nm) { uint8_t *seq = bam_get_seq(b); uint32_t *cigar = bam_get_cigar(b); bam1_core_t *c = &b->core; int i, x, y, u = 0; kstring_t *str; int32_t old_nm_i = -1, nm = 0; str = (kstring_t*)calloc(1, sizeof(kstring_t)); for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { int j, l = cigar[i]>>4, op = cigar[i]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (j = 0; j < l; ++j) { int c1, c2, z = y + j; if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]]; if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match if (flag&USE_EQUAL) seq[z/2] &= (z&1)? 0xf0 : 0x0f; ++u; } else { kputw(u, str); kputc(ref[x+j], str); u = 0; ++nm; } } if (j < l) break; x += l; y += l; } else if (op == BAM_CDEL) { kputw(u, str); kputc('^', str); for (j = 0; j < l; ++j) { if (x+j >= ref_len || ref[x+j] == '\0') break; kputc(ref[x+j], str); } u = 0; x += j; nm += j; if (j < l) break; } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { y += l; if (op == BAM_CINS) nm += l; } else if (op == BAM_CREF_SKIP) { x += l; } } kputw(u, str); // apply max_nm if (max_nm > 0 && nm >= max_nm) { for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { int j, l = cigar[i]>>4, op = cigar[i]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (j = 0; j < l; ++j) { int c1, c2, z = y + j; if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]]; if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match seq[z/2] |= (z&1)? 0x0f : 0xf0; bam_get_qual(b)[z] = 0; } } if (j < l) break; x += l; y += l; } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; } } // update NM if ((flag & UPDATE_NM) && !(c->flag & BAM_FUNMAP)) { uint8_t *old_nm = bam_aux_get(b, "NM"); if (old_nm) old_nm_i = bam_aux2i(old_nm); if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); else if (nm != old_nm_i) { fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm); bam_aux_del(b, old_nm); bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); } } // update MD if ((flag & UPDATE_MD) && !(c->flag & BAM_FUNMAP)) { uint8_t *old_md = bam_aux_get(b, "MD"); if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); else { int is_diff = 0; if (strlen((char*)old_md+1) == str->l) { for (i = 0; i < str->l; ++i) if (toupper(old_md[i+1]) != toupper(str->s[i])) break; if (i < str->l) is_diff = 1; } else is_diff = 1; if (is_diff) { fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s); bam_aux_del(b, old_md); bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); } } } // drop all tags but RG if (flag&DROP_TAG) { uint8_t *q = bam_aux_get(b, "RG"); bam_aux_drop_other(b, q); } // reduce the resolution of base quality if (flag&BIN_QUAL) { uint8_t *qual = bam_get_qual(b); for (i = 0; i < b->core.l_qseq; ++i) if (qual[i] >= 3) qual[i] = qual[i]/10*10 + 7; } free(str->s); free(str); }
likeClass::likeClass(const char *outfiles,argStruct *arguments,int inputtype){ postfix = ".glf.gz"; beaglepostfix = ".beagle.gz"; trim =0; GL=0; doGlf=0; errorFname = NULL; errorProbs = NULL; GL=0; minQ = MINQ;//<- general.h minInd=0; angsd_tmpdir = strdup("angsd_tmpdir"); if(arguments->argc==2){ if(!strcmp(arguments->argv[1],"-GL")){ printArg(stdout); exit(0); }else return; } getOptions(arguments); printArg(arguments->argumentFile); // if(GL==0) // return; if(GL==1) bam_likes_init(); else if(GL==2) gatk_init(); else if(GL==3){ soap.init(arguments->nInd,angsd_tmpdir); if(soap.doRecal) fprintf(stderr,"[%s] Will calculate recalibration matrices, please don't do any other analysis\n",__FILE__); else fprintf(stderr,"[%s] Will use precalculated calibration matrices\n",__FILE__); }else if(GL==4) { //default errormatrix double errorsDefault[4][4]={{0 ,0.00031 , 0.00373 , 0.000664}, {0.000737, 0 , 0.000576, 0.001702}, {0.001825,0.000386, 0 , 0.000653}, {0.00066 ,0.003648, 0.000321, 0 }, }; //allocate and plug in default values errors = new double *[4]; for(int i=0;i<4;i++){ errors[i] = new double[4]; for(int j=0;j<4;j++) errors[i][j] = errorsDefault[i][j]; } if(errorFname!=NULL) readError(errors,errorFname); errorProbs = error::generateErrorPointers(errors,3,4); } gzoutfile = Z_NULL; bufstr.s=NULL; bufstr.l=bufstr.m=0;// <- used for buffered output bufstr.l=0; if(doGlf){ if(doGlf!=2) gzoutfile = openFileGz(outfiles,postfix,GZOPT); else{ gzoutfile = openFileGz(outfiles,beaglepostfix,GZOPT); kputs("marker\tallele1\tallele2",&bufstr); for(int i=0;i<arguments->nInd;i++){ kputs("\tInd",&bufstr); kputw(i,&bufstr); kputs("\tInd",&bufstr); kputw(i,&bufstr); kputs("\tInd",&bufstr); kputw(i,&bufstr); } kputc('\n',&bufstr); gzwrite(gzoutfile,bufstr.s,bufstr.l); } } }
static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int min_pos) { if ( !srt->grp_str2int ) { // first time here, initialize if ( !srt->pair ) { if ( readers->collapse==COLLAPSE_NONE ) readers->collapse = BCF_SR_PAIR_EXACT; bcf_sr_set_opt(readers, BCF_SR_PAIR_LOGIC, readers->collapse); } bcf_sr_init_scores(srt); srt->grp_str2int = khash_str2int_init(); srt->var_str2int = khash_str2int_init(); } int k; khash_t(str2int) *hash; hash = srt->grp_str2int; for (k=0; k < kh_end(hash); k++) if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k)); hash = srt->var_str2int; for (k=0; k < kh_end(hash); k++) if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k)); kh_clear(str2int, srt->grp_str2int); kh_clear(str2int, srt->var_str2int); srt->ngrp = srt->nvar = srt->nvset = 0; grp_t grp; memset(&grp,0,sizeof(grp_t)); // group VCFs into groups, each with a unique combination of variants in the duplicate lines int ireader,ivar,irec,igrp,ivset,iact; for (ireader=0; ireader<readers->nreaders; ireader++) srt->vcf_buf[ireader].nrec = 0; for (iact=0; iact<srt->nactive; iact++) { ireader = srt->active[iact]; bcf_sr_t *reader = &readers->readers[ireader]; int rid = bcf_hdr_name2id(reader->header, chr); grp.nvar = 0; hts_expand(int,reader->nbuffer,srt->moff,srt->off); srt->noff = 0; srt->str.l = 0; for (irec=1; irec<=reader->nbuffer; irec++) { bcf1_t *line = reader->buffer[irec]; if ( line->rid!=rid || line->pos!=min_pos ) break; if ( srt->str.l ) kputc(';',&srt->str); srt->off[srt->noff++] = srt->str.l; size_t beg = srt->str.l; for (ivar=1; ivar<line->n_allele; ivar++) { if ( ivar>1 ) kputc(',',&srt->str); kputs(line->d.allele[0],&srt->str); kputc('>',&srt->str); kputs(line->d.allele[ivar],&srt->str); } if ( line->n_allele==1 ) { kputs(line->d.allele[0],&srt->str); kputsn(">.",2,&srt->str); } // Create new variant or attach to existing one. But careful, there can be duplicate // records with the same POS,REF,ALT (e.g. in dbSNP-b142) char *var_str = beg + srt->str.s; int ret, var_idx = 0, var_end = srt->str.l; while ( 1 ) { ret = khash_str2int_get(srt->var_str2int, var_str, &ivar); if ( ret==-1 ) break; var_t *var = &srt->var[ivar]; if ( var->vcf[var->nvcf-1] != ireader ) break; srt->str.l = var_end; kputw(var_idx, &srt->str); var_str = beg + srt->str.s; var_idx++; } if ( ret==-1 ) { ivar = srt->nvar++; hts_expand0(var_t,srt->nvar,srt->mvar,srt->var); srt->var[ivar].nvcf = 0; khash_str2int_set(srt->var_str2int, strdup(var_str), ivar); free(srt->var[ivar].str); // possible left-over from the previous position } var_t *var = &srt->var[ivar]; var->nalt = line->n_allele - 1; var->type = bcf_get_variant_types(line); srt->str.s[var_end] = 0; if ( ret==-1 ) var->str = strdup(var_str); int mvcf = var->mvcf; var->nvcf++; hts_expand0(int*, var->nvcf, var->mvcf, var->vcf); if ( mvcf != var->mvcf ) var->rec = (bcf1_t **) realloc(var->rec,sizeof(bcf1_t*)*var->mvcf); var->vcf[var->nvcf-1] = ireader; var->rec[var->nvcf-1] = line; grp.nvar++; hts_expand(var_t,grp.nvar,grp.mvar,grp.var); grp.var[grp.nvar-1] = ivar; } char *grp_key = grp_create_key(srt); int ret = khash_str2int_get(srt->grp_str2int, grp_key, &igrp); if ( ret==-1 ) { igrp = srt->ngrp++; hts_expand0(grp_t, srt->ngrp, srt->mgrp, srt->grp); free(srt->grp[igrp].var); srt->grp[igrp] = grp; srt->grp[igrp].key = grp_key; khash_str2int_set(srt->grp_str2int, grp_key, igrp); memset(&grp,0,sizeof(grp_t)); } else free(grp_key); srt->grp[igrp].nvcf++; } free(grp.var); // initialize bitmask - which groups is the variant present in for (ivar=0; ivar<srt->nvar; ivar++) { srt->var[ivar].mask = kbs_resize(srt->var[ivar].mask, srt->ngrp); kbs_clear(srt->var[ivar].mask); } for (igrp=0; igrp<srt->ngrp; igrp++) { for (ivar=0; ivar<srt->grp[igrp].nvar; ivar++) { int i = srt->grp[igrp].var[ivar]; kbs_insert(srt->var[i].mask, igrp); } } // create the initial list of variant sets for (ivar=0; ivar<srt->nvar; ivar++) { ivset = srt->nvset++; hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset); varset_t *vset = &srt->vset[ivset]; vset->nvar = 1; hts_expand0(var_t, vset->nvar, vset->mvar, vset->var); vset->var[vset->nvar-1] = ivar; var_t *var = &srt->var[ivar]; vset->cnt = var->nvcf; vset->mask = kbs_resize(vset->mask, srt->ngrp); kbs_clear(vset->mask); kbs_bitwise_or(vset->mask, var->mask); int type = 0; if ( var->type==VCF_REF ) type |= SR_REF; else { if ( var->type & VCF_SNP ) type |= SR_SNP; if ( var->type & VCF_MNP ) type |= SR_SNP; if ( var->type & VCF_INDEL ) type |= SR_INDEL; if ( var->type & VCF_OTHER ) type |= SR_OTHER; } var->type = type; } #if DEBUG_VSETS debug_vsets(srt); #endif // initialize the pairing matrix hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat); hts_expand(int, srt->nvset, srt->mcnt, srt->cnt); memset(srt->pmat, 0, sizeof(*srt->pmat)*srt->ngrp*srt->nvset); for (ivset=0; ivset<srt->nvset; ivset++) { varset_t *vset = &srt->vset[ivset]; for (igrp=0; igrp<srt->ngrp; igrp++) srt->pmat[ivset*srt->ngrp+igrp] = 0; srt->cnt[ivset] = vset->cnt; } // pair the lines while ( srt->nvset ) { #if DEBUG_VSETS fprintf(stderr,"\n"); debug_vsets(srt); #endif int imax = 0; for (ivset=1; ivset<srt->nvset; ivset++) if ( srt->cnt[imax] < srt->cnt[ivset] ) imax = ivset; int ipair = -1; uint32_t max_score = 0; for (ivset=0; ivset<srt->nvset; ivset++) { if ( kbs_logical_and(srt->vset[imax].mask,srt->vset[ivset].mask) ) continue; // cannot be merged uint32_t score = pairing_score(srt, imax, ivset); // fprintf(stderr,"score: %d %d, logic=%d \t..\t %u\n", imax,ivset,srt->pair,score); if ( max_score < score ) { max_score = score; ipair = ivset; } } // merge rows creating a new variant set this way if ( ipair!=-1 && ipair!=imax ) { imax = merge_vsets(srt, imax, ipair); continue; } push_vset(srt, imax); } srt->chr = chr; srt->pos = min_pos; }
void printReadBuffered(aRead &rd,aHead *hd,kstring_t &str) { str.l = 0; if(bam_validate1(hd,rd)==0){ fprintf(stderr,"problems validateing\n"); exit(0); } kputsn((char *)rd.vDat,rd.l_qname-1,&str);kputc('\t', &str); kputw((int)rd.flag_nc>>16, &str); kputc('\t', &str); if(rd.refID==-1)//unmatched read kputc('*', &str); else kputs(hd->name[rd.refID] , &str); kputc('\t', &str); kputw(rd.pos+1, &str); kputc('\t', &str); kputw(rd.mapQ, &str);kputc('\t', &str); int nCigs = rd.nCig; if(nCigs==0) kputc('*', &str);// if no cigars else{ for (int i = 0; i < nCigs; ++i) {//print cigars uint32_t *cigs =getCig(&rd); kputw(cigs[i]>>BAM_CIGAR_SHIFT, &str); kputc("MIDNSHP"[cigs[i]&BAM_CIGAR_MASK], &str); } } kputc('\t', &str); if(rd.next_refID==-1) kputc('*', &str);// if no cigars else if(rd.refID==rd.next_refID) kputc('=', &str); else kputs(hd->name[rd.next_refID] , &str); kputc('\t', &str); kputw(rd.next_pos+1, &str); kputc('\t', &str); kputw(rd.tlen, &str); kputc('\t', &str); //start seq char *seq = (char *)getSeq(&rd); for(int i=0;i<rd.l_seq;i++) kputc(bam_nt16_rev_table2[bam1_seqi(seq, i)], &str); kputc('\t', &str); char *quals =(char *)getQuals(&rd); for(int i=0;i<rd.l_seq;i++) kputc(quals[i]+33, &str); //below is taken directly from samtools,(not to steal, to preserve ordering etc, all credits go where credit is due) //from aux start to the last memadrs in chunk printAuxBuffered(getAuxStart(&rd),rd.vDat+rd.block_size,str); kputc('\n', &str); }
void abcGL::printLike(funkyPars *pars) { assert(pars->likes!=NULL); if(doGlf==1){ //glffinn format for(int i=0;i<pars->numSites;i++){ if(pars->keepSites[i]==0) continue; aio::bgzf_write(gzoutfile,pars->likes[i],sizeof(double)*10*pars->nInd); } } else if(doGlf==2){ //beagle format bufstr.l = 0; //set tmpbuf beginning to zero for(int s=0;s<pars->numSites;s++) { lh3struct *lh3 = (lh3struct*) pars->extras[index+1]; if(pars->keepSites[s]==0||lh3->hasAlloced[s]==0) continue; kputs(header->target_name[pars->refId],&bufstr); kputc('_',&bufstr); kputw(pars->posi[s]+1,&bufstr); kputc('\t',&bufstr); kputw(pars->major[s],&bufstr); kputc('\t',&bufstr); kputw(pars->minor[s],&bufstr); int major = pars->major[s]; int minor = pars->minor[s]; assert(major!=4&&minor!=4); for(int i=0;i<pars->nInd;i++) { double val[3]; val[0]= exp(lh3->lh3[s][i*3+0]); val[1]= exp(lh3->lh3[s][i*3+1]); val[2]= exp(lh3->lh3[s][i*3+2]); angsd::norm(val,3); ksprintf(&bufstr, "\t%f",val[0]); ksprintf(&bufstr, "\t%f",val[1]); ksprintf(&bufstr, "\t%f",val[2]); } if(bufstr.l!=0) kputc('\n',&bufstr); } aio::bgzf_write(gzoutfile,bufstr.s,bufstr.l);bufstr.l=0; } else if(doGlf==3) { //FGV v0.208 Aug,28 for(int s=0;s<pars->numSites;s++) { if(pars->keepSites[s]==0) //TSK 0.441 sep 25 continue; char major = pars->major[s]; char minor = pars->minor[s] ; assert(major!=4&&minor!=4); for(int i=0;i<pars->nInd;i++) { double dump[3]; dump[0] = pars->likes[s][i*10+angsd::majorminor[major][major]] ; dump[1] = pars->likes[s][i*10+angsd::majorminor[major][minor]] ; dump[2] = pars->likes[s][i*10+angsd::majorminor[minor][minor]] ; aio::bgzf_write(gzoutfile,dump,3*sizeof(double)); } bufstr.l=0; ksprintf(&bufstr,"%s\t%d\t",header->target_name[pars->refId],pars->posi[s]+1); ksprintf(&bufstr,"%c\t%c\n",intToRef[major],intToRef[minor]); aio::bgzf_write(gzoutfile2,bufstr.s,bufstr.l);bufstr.l=0; } } else if(doGlf==4){ bufstr.l=0; //otherwise print textoutput for(int s=0;s<pars->numSites;s++){ if(pars->keepSites[s]==0) continue; kputs(header->target_name[pars->refId],&bufstr); kputc('\t',&bufstr); kputw(pars->posi[s]+1,&bufstr); for(int i=0;i<10*pars->nInd;i++) ksprintf(&bufstr, "\t%f",pars->likes[s][i]); kputc('\n',&bufstr); } aio::bgzf_write(gzoutfile,bufstr.s,bufstr.l);bufstr.l=0; } }
void isec_vcf(args_t *args) { bcf_srs_t *files = args->files; kstring_t str = {0,0,0}; htsFile *out_fh = NULL; // When only one VCF is output, print VCF to stdout int out_std = 0; if ( args->nwrite==1 ) out_std = 1; if ( args->targets_list && files->nreaders==1 ) out_std = 1; if ( out_std ) { out_fh = hts_open("-",hts_bcf_wmode(args->output_type)); bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec"); bcf_hdr_write(out_fh, files->readers[args->iwrite].header); } if ( !args->nwrite && !out_std && !args->prefix ) fprintf(stderr,"Note: -w option not given, printing list of sites...\n"); int n; while ( (n=bcf_sr_next_line(files)) ) { bcf_sr_t *reader = NULL; bcf1_t *line = NULL; int i, ret = 0; for (i=0; i<files->nreaders; i++) { if ( !bcf_sr_has_line(files,i) ) continue; if ( !line ) { line = files->readers[i].buffer[0]; reader = &files->readers[i]; } ret |= 1<<i; // this may overflow for many files, but will be used only with two (OP_VENN) } switch (args->isec_op) { case OP_COMPLEMENT: if ( n!=1 || !bcf_sr_has_line(files,0) ) continue; break; case OP_EQUAL: if ( n != args->isec_n ) continue; break; case OP_PLUS: if ( n < args->isec_n ) continue; break; case OP_MINUS: if ( n > args->isec_n ) continue; } if ( out_std ) { if ( bcf_sr_has_line(files,args->iwrite) ) bcf_write1(out_fh, files->readers[args->iwrite].header, files->readers[args->iwrite].buffer[0]); continue; } else if ( args->fh_sites ) { str.l = 0; kputs(reader->header->id[BCF_DT_CTG][line->rid].key, &str); kputc('\t', &str); kputw(line->pos+1, &str); kputc('\t', &str); if (line->n_allele > 0) kputs(line->d.allele[0], &str); else kputc('.', &str); kputc('\t', &str); if (line->n_allele > 1) kputs(line->d.allele[1], &str); else kputc('.', &str); for (i=2; i<line->n_allele; i++) { kputc(',', &str); kputs(line->d.allele[i], &str); } kputc('\t', &str); for (i=0; i<files->nreaders; i++) kputc(bcf_sr_has_line(files,i)?'1':'0', &str); kputc('\n', &str); fwrite(str.s,sizeof(char),str.l,args->fh_sites); } if ( args->prefix ) { if ( args->isec_op==OP_VENN ) bcf_write1(args->fh_out[ret-1], reader->header, line); else { for (i=0; i<files->nreaders; i++) { if ( !bcf_sr_has_line(files,i) ) continue; if ( args->write && !args->write[i] ) continue; bcf_write1(args->fh_out[i], files->readers[i].header, files->readers[i].buffer[0]); } } } } if ( str.s ) free(str.s); if ( out_fh ) hts_close(out_fh); }
void likeClass::printLike(funkyPars *pars) { assert(pars->likes!=NULL); if(doGlf==1){ //glffinn format for(int i=0;i<pars->numSites;i++){ if(pars->keepSites[i]==0) continue; gzwrite(gzoutfile,pars->likes[i],sizeof(double)*10*pars->nInd); } } else if(doGlf==2){ //beagle format bufstr.l = 0; //set tmpbuf beginning to zero for(int s=0;s<pars->numSites;s++) { if(pars->keepSites[s]==0) continue; kputs(header->name[pars->refId],&bufstr); kputc('_',&bufstr); kputw(pars->posi[s]+1,&bufstr); kputc('\t',&bufstr); kputw(pars->major[s],&bufstr); kputc('\t',&bufstr); kputw(pars->minor[s],&bufstr); int major = pars->major[s]; int minor = pars->minor[s]; assert(major!=4&&minor!=4); for(int i=0;i<pars->nInd;i++) { double norm=exp(pars->likes[s][i*10+angsd::majorminor[major][major]])+exp(pars->likes[s][i*10+angsd::majorminor[major][minor]])+exp(pars->likes[s][i*10+angsd::majorminor[minor][minor]]); double val1 = exp(pars->likes[s][i*10+angsd::majorminor[major][major]])/norm; double val2 = exp(pars->likes[s][i*10+angsd::majorminor[major][minor]])/norm; double val3 = exp(pars->likes[s][i*10+angsd::majorminor[minor][minor]])/norm; ksprintf(&bufstr, "\t%f",val1); ksprintf(&bufstr, "\t%f",val2); ksprintf(&bufstr, "\t%f",val3); } kputc('\n',&bufstr); } gzwrite(gzoutfile,bufstr.s,bufstr.l); } else if(doGlf==3) { //FGV v0.208 Aug,28 for(int s=0;s<pars->numSites;s++) { if(pars->keepSites[s]==0) //TSK 0.441 sep 25 continue; int major = pars->major[s]; int minor = pars->minor[s] ; assert(major!=4&&minor!=4); for(int i=0;i<pars->nInd;i++) { double dump[3]; dump[0] = pars->likes[s][i*10+angsd::majorminor[major][major]] ; dump[1] = pars->likes[s][i*10+angsd::majorminor[major][minor]] ; dump[2] = pars->likes[s][i*10+angsd::majorminor[minor][minor]] ; gzwrite(gzoutfile,dump,3*sizeof(double)); } } } else if(doGlf==4){ bufstr.l=0; //otherwise print textoutput for(int s=0;s<pars->numSites;s++){ if(pars->keepSites[s]==0) continue; kputs(header->name[pars->refId],&bufstr); kputc('\t',&bufstr); kputw(pars->posi[s]+1,&bufstr); for(int i=0;i<10*pars->nInd;i++) ksprintf(&bufstr, "\t%f",pars->likes[s][i]); kputc('\n',&bufstr); } gzwrite(gzoutfile,bufstr.s,bufstr.l); } }
void isec_vcf(args_t *args) { bcf_srs_t *files = args->files; kstring_t str = {0,0,0}; htsFile *out_fh = NULL; // When only one VCF is output, print VCF to pysam_stdout or -o file int out_std = 0; if ( args->nwrite==1 && !args->prefix ) out_std = 1; if ( args->targets_list && files->nreaders==1 ) out_std = 1; if ( out_std ) { out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type)); if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno)); if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads); if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec"); bcf_hdr_write(out_fh, files->readers[args->iwrite].header); } if ( !args->nwrite && !out_std && !args->prefix ) fprintf(pysam_stderr,"Note: -w option not given, printing list of sites...\n"); int n; while ( (n=bcf_sr_next_line(files)) ) { bcf_sr_t *reader = NULL; bcf1_t *line = NULL; int i, ret = 0; for (i=0; i<files->nreaders; i++) { if ( !bcf_sr_has_line(files,i) ) continue; if ( args->nflt && args->flt[i] ) { bcf1_t *rec = bcf_sr_get_line(files, i); int pass = filter_test(args->flt[i], rec, NULL); if ( args->flt_logic[i] & FLT_EXCLUDE ) pass = pass ? 0 : 1; if ( !pass ) { files->has_line[i] = 0; n--; continue; } } if ( !line ) { line = files->readers[i].buffer[0]; reader = &files->readers[i]; } ret |= 1<<i; // this may overflow for many files, but will be used only with two (OP_VENN) } switch (args->isec_op) { case OP_COMPLEMENT: if ( n!=1 || !bcf_sr_has_line(files,0) ) continue; break; case OP_EQUAL: if ( n != args->isec_n ) continue; break; case OP_PLUS: if ( n < args->isec_n ) continue; break; case OP_MINUS: if ( n > args->isec_n ) continue; break; case OP_EXACT: for (i=0; i<files->nreaders; i++) if ( files->has_line[i] != args->isec_exact[i] ) break; if ( i<files->nreaders ) continue; break; } if ( out_std ) { if ( bcf_sr_has_line(files,args->iwrite) ) bcf_write1(out_fh, files->readers[args->iwrite].header, files->readers[args->iwrite].buffer[0]); continue; } else if ( args->fh_sites ) { str.l = 0; kputs(reader->header->id[BCF_DT_CTG][line->rid].key, &str); kputc('\t', &str); kputw(line->pos+1, &str); kputc('\t', &str); if (line->n_allele > 0) kputs(line->d.allele[0], &str); else kputc('.', &str); kputc('\t', &str); if (line->n_allele > 1) kputs(line->d.allele[1], &str); else kputc('.', &str); for (i=2; i<line->n_allele; i++) { kputc(',', &str); kputs(line->d.allele[i], &str); } kputc('\t', &str); for (i=0; i<files->nreaders; i++) kputc(bcf_sr_has_line(files,i)?'1':'0', &str); kputc('\n', &str); fwrite(str.s,sizeof(char),str.l,args->fh_sites); } if ( args->prefix ) { if ( args->isec_op==OP_VENN && ret==3 ) { if ( !args->nwrite || args->write[0] ) bcf_write1(args->fh_out[2], bcf_sr_get_header(files,0), bcf_sr_get_line(files,0)); if ( !args->nwrite || args->write[1] ) bcf_write1(args->fh_out[3], bcf_sr_get_header(files,1), bcf_sr_get_line(files,1)); } else { for (i=0; i<files->nreaders; i++) { if ( !bcf_sr_has_line(files,i) ) continue; if ( args->write && !args->write[i] ) continue; bcf_write1(args->fh_out[i], files->readers[i].header, files->readers[i].buffer[0]); } } } } if ( str.s ) free(str.s); if ( out_fh ) hts_close(out_fh); }
/** * Gets the base in the read that is mapped to a genomic position. * Extracts the read sequence and aualities too. */ void bam_get_base_and_qual_and_read_and_qual(bam1_t *srec, uint32_t pos, char& base, char& qual, int32_t& rpos, kstring_t* readseq, kstring_t* readqual) { bam1_core_t *c = &srec->core; int32_t rlen = c->l_qseq; uint32_t cpos = c->pos; //reference coordinates of the first mapped base rpos = 0; //read coordinates kstring_t str; str.l = str.m = 0, str.s = 0; base = 'N'; qual = 0; if (c->n_cigar) { uint32_t *cigar = bam_get_cigar(srec); for (uint32_t i = 0; i < c->n_cigar; ++i) { char op = bam_cigar_opchr(cigar[i]); str.l = 0; kputw(bam_cigar_oplen(cigar[i]), &str); char* stop; uint32_t len = strtol(str.s, &stop, 10); assert(stop); if (op=='M') { if (pos>=cpos && pos<=cpos+len-1) { rpos += pos-cpos; break; } cpos += len; rpos += len; } else if (op=='D') { if (pos>=cpos && pos<=cpos+len-1) { rpos = -1; break; } cpos += len; } else if (op=='S' || op=='I') { rpos += len; } } //std::cout << "bpos " << bpos << "\n"; if (rpos>=0 && rpos<=rlen) { //sequence bam_get_seq_string(srec, readseq); base = readseq->s[rpos]; //qual bam_get_qual_string(srec, readqual); qual = readqual->s[rpos]; } else { rpos = BAM_READ_INDEX_NA; } } // std::cout << "b: " << base << "\n"; // std::cout << "q: " << s[bpos-1] << " " << q << "\n"; // for (uint32_t i = 0; i < c->l_qseq; ++i) std::cerr << ((char)(s[i] + 33)); };