Beispiel #1
0
void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str)
{
	bam1_t *swap;
	int i, end;
	uint32_t *cigar;
	str->l = 0;
	if (b1->core.tid != b2->core.tid || b1->core.tid < 0) return; // coordinateless or not on the same chr; skip
	if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate
	kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
	kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
	for (i = 0, cigar = bam1_cigar(b1); i < b1->core.n_cigar; ++i) {
		kputw(bam_cigar_oplen(cigar[i]), str);
		kputc(bam_cigar_opchr(cigar[i]), str);
	}
	end = bam_calend(&b1->core, cigar);
	kputw(b2->core.pos - end, str);
	kputc('T', str);
	kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
	kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
	for (i = 0, cigar = bam1_cigar(b2); i < b2->core.n_cigar; ++i) {
		kputw(bam_cigar_oplen(cigar[i]), str);
		kputc(bam_cigar_opchr(cigar[i]), str);
	}
	bam_aux_append(b1, "CT", 'Z', str->l+1, (uint8_t*)str->s); 
}
Beispiel #2
0
/*
 * This function calculates ct tag for two bams, it assumes they are from the same template and
 * writes the tag to the first read in position terms.
 */
static void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str)
{
    bam1_t *swap;
    int i, end;
    uint32_t *cigar;
    str->l = 0;
    if (b1->core.tid != b2->core.tid || b1->core.tid < 0 || b1->core.pos < 0 || b2->core.pos < 0 || b1->core.flag&BAM_FUNMAP || b2->core.flag&BAM_FUNMAP) return; // coordinateless or not on the same chr; skip
    if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate
    kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
    kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
    for (i = 0, cigar = bam_get_cigar(b1); i < b1->core.n_cigar; ++i) {
        kputw(bam_cigar_oplen(cigar[i]), str);
        kputc(bam_cigar_opchr(cigar[i]), str);
    }
    end = bam_endpos(b1);
    kputw(b2->core.pos - end, str);
    kputc('T', str);
    kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
    kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
    for (i = 0, cigar = bam_get_cigar(b2); i < b2->core.n_cigar; ++i) {
        kputw(bam_cigar_oplen(cigar[i]), str);
        kputc(bam_cigar_opchr(cigar[i]), str);
    }

    uint8_t* data;
    if ((data = bam_aux_get(b1,"ct")) != NULL) bam_aux_del(b1, data);
    if ((data = bam_aux_get(b2,"ct")) != NULL) bam_aux_del(b2, data);

    bam_aux_append(b1, "ct", 'Z', str->l+1, (uint8_t*)str->s);
}
Beispiel #3
0
/**
 * Returns a string representation of this Genome Interval.
 */
void GenomeInterval::to_string(kstring_t *interval)
{
    interval->l = 0;
    kputs(seq.c_str(), interval);
    if (start1!=1 || end1!=((1<<29)-1))
    {
        kputc(':', interval);
        kputw(start1, interval);
        kputc('-', interval);
        kputw(end1, interval);
    }
};
Beispiel #4
0
void frequency::prepPrint(funkyPars *pars){
  kstring_t *bufstr = new kstring_t ;
  bufstr->s=NULL; bufstr->l=bufstr->m=0;


  for(int s=0;s<pars->numSites;s++) {
    if(pars->keepSites[s]==0)
      continue;
    //plugin chr,pos,major,minor
    kputs(header->name[pars->refId],bufstr);kputc('\t',bufstr);
    kputw(pars->posi[s]+1,bufstr);kputc('\t',bufstr);
    kputc(intToRef[pars->major[s]],bufstr);kputc('\t',bufstr);
    kputc(intToRef[pars->minor[s]],bufstr);kputc('\t',bufstr);


    //plugin ref, anc if exists
    if(pars->ref!=NULL)
      {kputc(intToRef[pars->ref[s]],bufstr);kputc('\t',bufstr);}
    if(pars->anc!=NULL)
      {kputc(intToRef[pars->anc[s]],bufstr);kputc('\t',bufstr);}

    
    
    if(doMaf &1)
      ksprintf(bufstr,"%f\t",pars->results->freq->pml[s]);
    if(doMaf &2)
      ksprintf(bufstr,"%f\t",pars->results->freq->pEM[s]);
    if(doMaf &4)
      ksprintf(bufstr,"%f\t",pars->results->freq->pmlun[s]);
    if(doMaf &8)
      ksprintf(bufstr,"%f\t",pars->results->freq->pEMun[s]);
    if(doMaf &16)
      ksprintf(bufstr,"%f\t",pars->results->asso->freq[s]);
    if(doMaf &32)
      ksprintf(bufstr,"%f\t",pars->phat[s]);


    if(doSNP){
      if(doMaf &1)
	ksprintf(bufstr,"%f\t",pars->results->freq->pmlSNP[s]);
      if(doMaf &2)
	ksprintf(bufstr,"%f\t",pars->results->freq->pEMSNP[s]);
      if(doMaf &4)
	ksprintf(bufstr,"%f\t",pars->results->freq->pmlunSNP[s]);
      if(doMaf &8)
	ksprintf(bufstr,"%f\t",pars->results->freq->pEMunSNP[s]);
    }

    kputw(pars->keepSites[s],bufstr);kputc('\n',bufstr);
  }

  pars->extras[index] = bufstr;
}
Beispiel #5
0
Datei: bed.cpp Projekt: atks/vt
/**
 * String version of BED record.
 */
std::string BEDRecord::to_string()
{
    kstring_t s = {0,0,0}; 

    kputs(this->chrom.c_str(), &s);
    kputc(':', &s);
    kputw(this->beg1, &s);
    kputc('-', &s);
    kputw(this->end1, &s);

    std::string str(s.s);
    if (s.m) free(s.s);
    return str;
};
Beispiel #6
0
/**
 * Returns a string representation of this Genome Interval.
 */
std::string GenomeInterval::to_string()
{
    kstring_t s = {0,0,0};
    kputs(seq.c_str(), &s);
    if (start1!=1 || end1!=((1<<29)-1))
    {
        kputc(':', &s);
        kputw(start1, &s);
        kputc('-', &s);
        kputw(end1, &s);
    }
    std::string interval(s.s);
    if (s.m) free(s.s);
    return interval;
};
Beispiel #7
0
/**
 * Gets a sorted string representation of a variant.
 */
void bcf_variant2string_sorted(bcf_hdr_t *h, bcf1_t *v, kstring_t *var)
{
    bcf_print_liten(h,v);
    
    bcf_unpack(v, BCF_UN_STR);
    var->l = 0;
    kputs(bcf_get_chrom(h, v), var);
    kputc(':', var);
    kputw(bcf_get_pos1(v), var);
    kputc(':', var);
    
    if (v->n_allele==2)
    {
        kputs(bcf_get_alt(v, 0), var);
        kputc(',', var);
        kputs(bcf_get_alt(v, 1), var);
    }
    else
    {
        char** allele = bcf_get_allele(v);
        char** temp = (char**) malloc((bcf_get_n_allele(v)-1)*sizeof(char*));
        for (int32_t i=1; i<v->n_allele; ++i)
        {
            temp[i] = allele[i];
        }
        std::qsort(temp, bcf_get_n_allele(v), sizeof(char*), cmpstr);
        kputs(bcf_get_alt(v, 0), var);
        for (int32_t i=0; i<v->n_allele-1; ++i)
        {
            kputc(',', var);
            kputs(temp[i], var);
        }
        free(temp);
    }
}
Beispiel #8
0
void fml_utg_print(int n, const fml_utg_t *utg)
{
	int i, j, l;
	kstring_t out = {0,0,0};
	for (i = 0; i < n; ++i) {
		const fml_utg_t *u = &utg[i];
		out.l = 0;
		kputc('@', &out); kputw(i<<1|0, &out); kputc(':', &out); kputw(i<<1|1, &out);
		kputc('\t', &out); kputw(u->nsr, &out);
		kputc('\t', &out);
		for (j = 0; j < u->n_ovlp[0]; ++j) {
			kputw(u->ovlp[j].id<<1|u->ovlp[j].to, &out); kputc(',', &out);
			kputw(u->ovlp[j].len, &out); kputc(';', &out);
		}
		if (u->n_ovlp[0] == 0) kputc('.', &out);
		kputc('\t', &out);
		for (; j < u->n_ovlp[0] + u->n_ovlp[1]; ++j) {
			kputw(u->ovlp[j].id<<1|u->ovlp[j].to, &out); kputc(',', &out);
			kputw(u->ovlp[j].len, &out); kputc(';', &out);
		}
		if (u->n_ovlp[1] == 0) kputc('.', &out);
		kputc('\n', &out);
		l = out.l;
		kputsn(u->seq, u->len, &out);
		kputsn("\n+\n", 3, &out);
		kputsn(u->cov, u->len, &out);
		kputc('\n', &out);
		fputs(out.s, stdout);
	}
	free(out.s);
}
Beispiel #9
0
int main_getalt(int argc, char *argv[])
{
	int c;
	char *fn;
	BGZF *fp;
	bcf1_t *b;
	bcf_hdr_t *h;
	kstring_t s = {0,0,0};

	while ((c = getopt(argc, argv, "")) >= 0) {
	}
	if (argc - optind == 0) {
		fprintf(stderr, "Usage: bgt getalt <bgt-base>\n");
		return 1;
	}

	fn = (char*)calloc(strlen(argv[optind]) + 5, 1);
	sprintf(fn, "%s.bcf", argv[optind]);
	fp = bgzf_open(fn, "r");
	free(fn);
	assert(fp);

	h = bcf_hdr_read(fp);
	b = bcf_init1();
	while (bcf_read1(fp, b) >= 0) {
		char *ref, *alt;
		int l_ref, l_alt, i, min_l;
		bcf_get_ref_alt1(b, &l_ref, &ref, &l_alt, &alt);
		min_l = l_ref < l_alt? l_ref : l_alt;
		for (i = 0; i < min_l && ref[i] == alt[i]; ++i);
		s.l = 0;
		kputs(h->id[BCF_DT_CTG][b->rid].key, &s);
		kputc(':', &s); kputw(b->pos + 1 + i, &s);
		kputc(':', &s); kputw(b->rlen - i, &s);
		kputc(':', &s); kputsn(alt + i, l_alt - i, &s);
		puts(s.s);
	}
	bcf_destroy1(b);
	bcf_hdr_destroy(h);

	bgzf_close(fp);
	free(s.s);
	return 0;
}
Beispiel #10
0
void bcf_fmt_array(kstring_t *s, int n, int type, void *data)
{
	int j = 0;
	if (n == 0) {
		kputc('.', s);
		return;
	}
    if (type == BCF_BT_CHAR) 
    {
        char *p = (char*)data;
        for (j = 0; j < n && *p; ++j, ++p) kputc(*p, s);
    }
    else
    {
        #define BRANCH(type_t, is_missing, kprint) {\
            type_t *p = (type_t *) data; \
            for (j=0; j<n && !(is_missing); j++) p++; \
            if ( j ) \
            { \
                p = (type_t *) data; \
                for (j=0; j<n; j++, p++) \
                { \
                    if ( j ) kputc(',', s); \
                    if ( is_missing ) kputc('.', s); \
                    else kprint; \
                } \
                if (n && j == 0) kputc('.', s); \
            } \
            else kputc('.', s); \
        }
        switch (type) {
            case BCF_BT_INT8:  BRANCH(int8_t,  *p==INT8_MIN,  kputw(*p, s)); break;
            case BCF_BT_INT16: BRANCH(int16_t, *p==INT16_MIN, kputw(*p, s)); break;
            case BCF_BT_INT32: BRANCH(int32_t, *p==INT32_MIN, kputw(*p, s)); break;
            case BCF_BT_FLOAT: BRANCH(float,  *(uint32_t*)p==bcf_missing_float, ksprintf(s, "%g", *p)); break;
            default: fprintf(stderr,"todo: type %d\n", type); exit(1); break;
        }
        #undef BRANCH
    }
}
Beispiel #11
0
void printAuxBuffered(uint8_t *s, uint8_t *sStop,kstring_t &str ) {
  //  fprintf(stderr,"\ncomp:%p vs %p\n",s,sStop);
  
  while (s < sStop) {
    uint8_t type;
    kputc('\t', &str);kputc(s[0], &str);kputc(s[1], &str); kputc(':', &str); 
    //    fprintf(stderr,"\t%c%c:",s[0],s[1]);
    s += 2; type = *s; ++s;
    //    fprintf(stderr,"\ntype=%c\n",type);//,(char)*s);
    //    kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str);
    if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; }
    else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; }
    else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; }
    else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; }
    else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; }
    else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; }
    else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; }
    else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
    else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
    else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; }
    else if (type == 'B') {
      uint8_t sub_type = *(s++);
      int32_t n;
      memcpy(&n, s, 4);
      s += 4; // no point to the start of the array
      kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing
      for (int i = 0; i < n; ++i) {
	kputc(',', &str);
	if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; }
	else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; }
	else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; }
	else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; }
	else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; }
	else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; }
	else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; }
      }
    }
  }
  //  fprintf(stderr,"done\n");
}
Beispiel #12
0
/**
 * Gets a string representation of the underlying VNTR by fuzzy alignment.
 */
void Variant::get_fuzzy_vntr_string(kstring_t* s)
{
    s->l = 0;
    kputs(chrom.c_str(), s);
    kputc(':', s);
    kputw(vntr.fuzzy_beg1, s);
    kputc(':', s);
    kputs(vntr.fuzzy_repeat_tract.c_str(), s);
    kputc(':', s);
    kputs("<VNTR>", s);
    kputc(':', s);
    kputs(vntr.motif.c_str(), s);
};
Beispiel #13
0
/**
 * Gets a string representation of a variant.
 */
void bcf_variant2string(bcf_hdr_t *h, bcf1_t *v, kstring_t *var)
{
    bcf_unpack(v, BCF_UN_STR);
    var->l = 0;
    kputs(bcf_get_chrom(h, v), var);
    kputc(':', var);
    kputw(bcf_get_pos1(v), var);
    kputc(':', var);
    for (int32_t i=0; i<v->n_allele; ++i)
    {
        if (i) kputc(',', var);
        kputs(bcf_get_alt(v, i), var);
    }
}
Beispiel #14
0
/**
 * Gets the cigar from a BAM record
 */
void bam_get_cigar_string(bam1_t *s, kstring_t *cigar_string)
{
    cigar_string->l=0;
    int32_t n_cigar_op = bam_get_n_cigar_op(s);
    if (n_cigar_op)
    {
        uint32_t *cigar = bam_get_cigar(s);
        for (int32_t i = 0; i < n_cigar_op; ++i)
        {
            kputw(bam_cigar_oplen(cigar[i]), cigar_string);
            kputc(bam_cigar_opchr(cigar[i]), cigar_string);
        }
    }
}
Beispiel #15
0
// Returns 0 on success, -1 on failure.
static int bam_format_cigar(const bam1_t* b, kstring_t* str)
{
    // An empty cigar is a special case return "*" rather than ""
    if (b->core.n_cigar == 0) {
        return (kputc('*', str) == EOF) ? -1 : 0;
    }

    const uint32_t *cigar = bam_get_cigar(b);
    uint32_t i;

    for (i = 0; i < b->core.n_cigar; ++i) {
        if (kputw(bam_cigar_oplen(cigar[i]), str) == EOF) return -1;
        if (kputc(bam_cigar_opchr(cigar[i]), str) == EOF) return -1;
    }

    return 0;
}
Beispiel #16
0
/**
 * Gets a string representation of the variant.
 */
std::string Variant::get_variant_string()
{
    kstring_t var = {0,0,0};
    bcf_unpack(v, BCF_UN_STR);
    var.l = 0;
    kputs(bcf_get_chrom(h, v), &var);
    kputc(':', &var);
    kputw(bcf_get_pos1(v), &var);
    kputc(':', &var);
    for (size_t i=0; i<bcf_get_n_allele(v); ++i)
    {
        if (i) kputc('/', &var);
        kputs(bcf_get_alt(v, i), &var);
    }

    std::string str(var.s);

    if (var.m) free(var.s);

    return str;
}
Beispiel #17
0
void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
{
	int i;
	kstring_t str = {0,0,0};
	for (i = 0; i < a->n; ++i) {
		const mem_alnreg_t *p = &a->a[i];
		int is_rev, rid, qb = p->qb, qe = p->qe;
		int64_t pos, rb = p->rb, re = p->re;
		pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
		rid = bns_pos2rid(bns, pos);
		assert(rid == p->rid);
		pos -= bns->anns[rid].offset;
		kputs(s->name, &str); kputc('\t', &str);
		kputw(s->l_seq, &str); kputc('\t', &str);
		if (is_rev) qb ^= qe, qe ^= qb, qb ^= qe; // swap
		kputw(qb, &str); kputc('\t', &str); kputw(qe, &str); kputc('\t', &str);
		kputs(bns->anns[rid].name, &str); kputc('\t', &str);
		kputw(bns->anns[rid].len, &str); kputc('\t', &str);
		kputw(pos, &str); kputc('\t', &str); kputw(pos + (re - rb), &str); kputc('\t', &str);
		ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb));
		kputc('\n', &str);
	}
	s->sam = str.s;
}
Beispiel #18
0
char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
{
	uint8_t *s = bam1_seq(b), *t = bam1_qual(b);
	int i;
	const bam1_core_t *c = &b->core;
	kstring_t str;
	str.l = str.m = 0; str.s = 0;

	kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str);
	if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); }
	else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag);
	else { // BAM_OFSTR
		for (i = 0; i < 16; ++i)
			if ((c->flag & 1<<i) && bam_flag2char_table[i])
				kputc(bam_flag2char_table[i], &str);
		kputc('\t', &str);
	}
	if (c->tid < 0) kputsn("*\t", 2, &str);
	else {
		if (header) kputs(header->target_name[c->tid] , &str);
		else kputw(c->tid, &str);
		kputc('\t', &str);
	}
	kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str);
	if (c->n_cigar == 0) kputc('*', &str);
	else {
		for (i = 0; i < c->n_cigar; ++i) {
			kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);
			kputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);
		}
	}
	kputc('\t', &str);
	if (c->mtid < 0) kputsn("*\t", 2, &str);
	else if (c->mtid == c->tid) kputsn("=\t", 2, &str);
	else {
		if (header) kputs(header->target_name[c->mtid], &str);
		else kputw(c->mtid, &str);
		kputc('\t', &str);
	}
	kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str);
	if (c->l_qseq) {
		for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
		kputc('\t', &str);
		if (t[0] == 0xff) kputc('*', &str);
		else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
	} else kputsn("*\t*", 3, &str);
	s = bam1_aux(b);
	while (s < b->data + b->data_len) {
		uint8_t type, key[2];
		key[0] = s[0]; key[1] = s[1];
		s += 2; type = *s; ++s;
		kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str);
		if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; }
		else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; }
		else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; }
		else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; }
		else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; }
		else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; }
		else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; }
		else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
		else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
		else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; }
	}
	return str.s;
}
Beispiel #19
0
bcf_hdr_t *vcf_hdr_read(htsFile *fp)
{
	if (!fp->is_bin) {
		kstring_t txt, *s = &fp->line;
		bcf_hdr_t *h;
		h = bcf_hdr_init();
		txt.l = txt.m = 0; txt.s = 0;
		while (hts_getline(fp, KS_SEP_LINE, s) >= 0) {
			if (s->l == 0) continue;
			if (s->s[0] != '#') {
				if (hts_verbose >= 2)
					fprintf(stderr, "[E::%s] no sample line\n", __func__);
				free(txt.s);
				bcf_hdr_destroy(h);
				return 0;
			}
			if (s->s[1] != '#' && fp->fn_aux) { // insert contigs here
				int dret;
				gzFile f;
				kstream_t *ks;
				kstring_t tmp;
				tmp.l = tmp.m = 0; tmp.s = 0;
				f = gzopen(fp->fn_aux, "r");
				ks = ks_init(f);
				while (ks_getuntil(ks, 0, &tmp, &dret) >= 0) {
					int c;
					kputs("##contig=<ID=", &txt); kputs(tmp.s, &txt);
					ks_getuntil(ks, 0, &tmp, &dret);
					kputs(",length=", &txt); kputw(atol(tmp.s), &txt);
					kputsn(">\n", 2, &txt);
					if (dret != '\n')
						while ((c = ks_getc(ks)) != '\n' && c != -1); // skip the rest of the line
				}
				free(tmp.s);
				ks_destroy(ks);
				gzclose(f);
			}
			kputsn(s->s, s->l, &txt);
			if (s->s[1] != '#') break;
			kputc('\n', &txt);
		}
		h->l_text = txt.l + 1; // including NULL
		h->text = txt.s;
		bcf_hdr_parse(h);
        // check tabix index, are all contigs listed in the header? add the missing ones
        tbx_t *idx = tbx_index_load(fp->fn);
        if ( idx )
        {
			int i, n, need_sync = 0;
			const char **names = tbx_seqnames(idx, &n);
			for (i=0; i<n; i++)
			{
                bcf_hrec_t *hrec = bcf_hdr_get_hrec(h, BCF_DT_CTG, (char*) names[i]);
                if ( hrec ) continue;
                hrec = (bcf_hrec_t*) calloc(1,sizeof(bcf_hrec_t));
                hrec->key = strdup("contig");
                bcf_hrec_add_key(hrec, "ID", strlen("ID"));
                bcf_hrec_set_val(hrec, hrec->nkeys-1, (char*) names[i], strlen(names[i]), 0);
                bcf_hrec_add_key(hrec, "length", strlen("length"));
                bcf_hrec_set_val(hrec, hrec->nkeys-1, "-1", strlen("-1"), 0);   // what is a good default value?
                bcf_hdr_add_hrec(h, hrec);
                need_sync = 1;
			}
			free(names);
			tbx_destroy(idx);
            if ( need_sync )
            {
                bcf_hdr_sync(h);
                bcf_hdr_fmt_text(h);
            }
		}
		return h;
	} else return bcf_hdr_read((BGZF*)fp->fp);
}
Beispiel #20
0
void bam_fillmd1_core(bam1_t *b, char *ref, int ref_len, int flag, int max_nm)
{
    uint8_t *seq = bam_get_seq(b);
    uint32_t *cigar = bam_get_cigar(b);
    bam1_core_t *c = &b->core;
    int i, x, y, u = 0;
    kstring_t *str;
    int32_t old_nm_i = -1, nm = 0;

    str = (kstring_t*)calloc(1, sizeof(kstring_t));
    for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
        int j, l = cigar[i]>>4, op = cigar[i]&0xf;
        if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
            for (j = 0; j < l; ++j) {
                int c1, c2, z = y + j;
                if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
                c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
                if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
                    if (flag&USE_EQUAL) seq[z/2] &= (z&1)? 0xf0 : 0x0f;
                    ++u;
                } else {
                    kputw(u, str);
                    kputc(ref[x+j], str);
                    u = 0;
                    ++nm;
                }
            }
            if (j < l) break;
            x += l;
            y += l;
        } else if (op == BAM_CDEL) {
            kputw(u, str);
            kputc('^', str);
            for (j = 0; j < l; ++j) {
                if (x+j >= ref_len || ref[x+j] == '\0') break;
                kputc(ref[x+j], str);
            }
            u = 0;
            x += j;
            nm += j;
            if (j < l) break;
        } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
            y += l;
            if (op == BAM_CINS) nm += l;
        } else if (op == BAM_CREF_SKIP) {
            x += l;
        }
    }
    kputw(u, str);
    // apply max_nm
    if (max_nm > 0 && nm >= max_nm) {
        for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
            int j, l = cigar[i]>>4, op = cigar[i]&0xf;
            if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
                for (j = 0; j < l; ++j) {
                    int c1, c2, z = y + j;
                    if (x+j >= ref_len || ref[x+j] == '\0') break; // out of bounds
                    c1 = bam_seqi(seq, z), c2 = seq_nt16_table[(int)ref[x+j]];
                    if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
                        seq[z/2] |= (z&1)? 0x0f : 0xf0;
                        bam_get_qual(b)[z] = 0;
                    }
                }
                if (j < l) break;
                x += l;
                y += l;
            } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
            else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
        }
    }
    // update NM
    if ((flag & UPDATE_NM) && !(c->flag & BAM_FUNMAP)) {
        uint8_t *old_nm = bam_aux_get(b, "NM");
        if (old_nm) old_nm_i = bam_aux2i(old_nm);
        if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
        else if (nm != old_nm_i) {
            fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm);
            bam_aux_del(b, old_nm);
            bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
        }
    }
    // update MD
    if ((flag & UPDATE_MD) && !(c->flag & BAM_FUNMAP)) {
        uint8_t *old_md = bam_aux_get(b, "MD");
        if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
        else {
            int is_diff = 0;
            if (strlen((char*)old_md+1) == str->l) {
                for (i = 0; i < str->l; ++i)
                    if (toupper(old_md[i+1]) != toupper(str->s[i]))
                        break;
                if (i < str->l) is_diff = 1;
            } else is_diff = 1;
            if (is_diff) {
                fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s);
                bam_aux_del(b, old_md);
                bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
            }
        }
    }

    // drop all tags but RG
    if (flag&DROP_TAG) {
        uint8_t *q = bam_aux_get(b, "RG");
        bam_aux_drop_other(b, q);
    }
    // reduce the resolution of base quality
    if (flag&BIN_QUAL) {
        uint8_t *qual = bam_get_qual(b);
        for (i = 0; i < b->core.l_qseq; ++i)
            if (qual[i] >= 3) qual[i] = qual[i]/10*10 + 7;
    }

    free(str->s);
    free(str);
}
Beispiel #21
0
likeClass::likeClass(const char *outfiles,argStruct *arguments,int inputtype){
  
  postfix = ".glf.gz";
  beaglepostfix = ".beagle.gz";
  

  trim =0;
  GL=0;
  doGlf=0;
  errorFname = NULL;
  errorProbs = NULL;
  GL=0;
  minQ = MINQ;//<- general.h
  minInd=0;
  angsd_tmpdir = strdup("angsd_tmpdir");
  
  if(arguments->argc==2){
    if(!strcmp(arguments->argv[1],"-GL")){
      printArg(stdout);
      exit(0);
    }else
      return;
  }

  getOptions(arguments);
  printArg(arguments->argumentFile);

  //  if(GL==0)
  //  return;
  if(GL==1)
    bam_likes_init();
  else if(GL==2)
    gatk_init();
  else if(GL==3){
    soap.init(arguments->nInd,angsd_tmpdir);
    if(soap.doRecal)
      fprintf(stderr,"[%s] Will calculate recalibration matrices, please don't do any other analysis\n",__FILE__);
    else
      fprintf(stderr,"[%s] Will use precalculated calibration matrices\n",__FILE__);

  }else if(GL==4) {
    //default errormatrix
    double errorsDefault[4][4]={{0       ,0.00031 , 0.00373 , 0.000664},
				{0.000737,   0    , 0.000576, 0.001702},
				{0.001825,0.000386,    0    , 0.000653},
				{0.00066 ,0.003648, 0.000321,    0    },
    };
    //allocate and plug in default values
    errors = new double *[4];
    for(int i=0;i<4;i++){
      errors[i] = new double[4];
      for(int j=0;j<4;j++)
	errors[i][j] = errorsDefault[i][j];
    }
    if(errorFname!=NULL)
      readError(errors,errorFname);
    errorProbs = error::generateErrorPointers(errors,3,4);
  }
  
  gzoutfile = Z_NULL;
  bufstr.s=NULL; bufstr.l=bufstr.m=0;// <- used for buffered output 
  bufstr.l=0;
  if(doGlf){
 
    if(doGlf!=2)
      gzoutfile = openFileGz(outfiles,postfix,GZOPT);
    else{
      gzoutfile = openFileGz(outfiles,beaglepostfix,GZOPT);
      
      kputs("marker\tallele1\tallele2",&bufstr);
      for(int i=0;i<arguments->nInd;i++){
	kputs("\tInd",&bufstr);
	kputw(i,&bufstr);
	kputs("\tInd",&bufstr);
	kputw(i,&bufstr);
	kputs("\tInd",&bufstr);
	kputw(i,&bufstr);
      }
      kputc('\n',&bufstr);
      gzwrite(gzoutfile,bufstr.s,bufstr.l);
    }
 
  }

}
Beispiel #22
0
static void bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr, int min_pos)
{
    if ( !srt->grp_str2int )
    {
        // first time here, initialize
        if ( !srt->pair )
        {
            if ( readers->collapse==COLLAPSE_NONE ) readers->collapse = BCF_SR_PAIR_EXACT;
            bcf_sr_set_opt(readers, BCF_SR_PAIR_LOGIC, readers->collapse);
        }
        bcf_sr_init_scores(srt);
        srt->grp_str2int = khash_str2int_init();
        srt->var_str2int = khash_str2int_init();
    }
    int k;
    khash_t(str2int) *hash;
    hash = srt->grp_str2int;
    for (k=0; k < kh_end(hash); k++)
        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
    hash = srt->var_str2int;
    for (k=0; k < kh_end(hash); k++)
        if ( kh_exist(hash,k) ) free((char*)kh_key(hash,k));
    kh_clear(str2int, srt->grp_str2int);
    kh_clear(str2int, srt->var_str2int);
    srt->ngrp = srt->nvar = srt->nvset = 0;

    grp_t grp;
    memset(&grp,0,sizeof(grp_t));

    // group VCFs into groups, each with a unique combination of variants in the duplicate lines
    int ireader,ivar,irec,igrp,ivset,iact;
    for (ireader=0; ireader<readers->nreaders; ireader++) srt->vcf_buf[ireader].nrec = 0;
    for (iact=0; iact<srt->nactive; iact++)
    {
        ireader = srt->active[iact];
        bcf_sr_t *reader = &readers->readers[ireader];
        int rid   = bcf_hdr_name2id(reader->header, chr);
        grp.nvar  = 0;
        hts_expand(int,reader->nbuffer,srt->moff,srt->off);
        srt->noff  = 0;
        srt->str.l = 0;
        for (irec=1; irec<=reader->nbuffer; irec++)
        {
            bcf1_t *line = reader->buffer[irec];
            if ( line->rid!=rid || line->pos!=min_pos ) break;

            if ( srt->str.l ) kputc(';',&srt->str);
            srt->off[srt->noff++] = srt->str.l;
            size_t beg = srt->str.l;
            for (ivar=1; ivar<line->n_allele; ivar++)
            {
                if ( ivar>1 ) kputc(',',&srt->str);
                kputs(line->d.allele[0],&srt->str);
                kputc('>',&srt->str);
                kputs(line->d.allele[ivar],&srt->str);
            }
            if ( line->n_allele==1 )
            {
                kputs(line->d.allele[0],&srt->str);
                kputsn(">.",2,&srt->str);
            }

            // Create new variant or attach to existing one. But careful, there can be duplicate
            // records with the same POS,REF,ALT (e.g. in dbSNP-b142)
            char *var_str = beg + srt->str.s;
            int ret, var_idx = 0, var_end = srt->str.l;
            while ( 1 )
            {
                ret = khash_str2int_get(srt->var_str2int, var_str, &ivar);
                if ( ret==-1 ) break;

                var_t *var = &srt->var[ivar];
                if ( var->vcf[var->nvcf-1] != ireader ) break;

                srt->str.l = var_end;
                kputw(var_idx, &srt->str);
                var_str = beg + srt->str.s;
                var_idx++;
            }
            if ( ret==-1 )
            {
                ivar = srt->nvar++;
                hts_expand0(var_t,srt->nvar,srt->mvar,srt->var);
                srt->var[ivar].nvcf = 0;
                khash_str2int_set(srt->var_str2int, strdup(var_str), ivar);
                free(srt->var[ivar].str);   // possible left-over from the previous position
            }
            var_t *var = &srt->var[ivar];
            var->nalt = line->n_allele - 1;
            var->type = bcf_get_variant_types(line);
            srt->str.s[var_end] = 0;
            if ( ret==-1 )
                var->str = strdup(var_str);

            int mvcf = var->mvcf;
            var->nvcf++;
            hts_expand0(int*, var->nvcf, var->mvcf, var->vcf);
            if ( mvcf != var->mvcf ) var->rec = (bcf1_t **) realloc(var->rec,sizeof(bcf1_t*)*var->mvcf);
            var->vcf[var->nvcf-1] = ireader;
            var->rec[var->nvcf-1] = line;

            grp.nvar++;
            hts_expand(var_t,grp.nvar,grp.mvar,grp.var);
            grp.var[grp.nvar-1] = ivar;
        }
        char *grp_key = grp_create_key(srt);
        int ret = khash_str2int_get(srt->grp_str2int, grp_key, &igrp);
        if ( ret==-1 )
        {
            igrp = srt->ngrp++;
            hts_expand0(grp_t, srt->ngrp, srt->mgrp, srt->grp);
            free(srt->grp[igrp].var);
            srt->grp[igrp] = grp;
            srt->grp[igrp].key = grp_key;
            khash_str2int_set(srt->grp_str2int, grp_key, igrp);
            memset(&grp,0,sizeof(grp_t));
        }
        else
            free(grp_key);
        srt->grp[igrp].nvcf++;
    }
    free(grp.var);

    // initialize bitmask - which groups is the variant present in
    for (ivar=0; ivar<srt->nvar; ivar++)
    {
        srt->var[ivar].mask = kbs_resize(srt->var[ivar].mask, srt->ngrp);
        kbs_clear(srt->var[ivar].mask);
    }
    for (igrp=0; igrp<srt->ngrp; igrp++)
    {
        for (ivar=0; ivar<srt->grp[igrp].nvar; ivar++)
        {
            int i = srt->grp[igrp].var[ivar];
            kbs_insert(srt->var[i].mask, igrp);
        }
    }

    // create the initial list of variant sets
    for (ivar=0; ivar<srt->nvar; ivar++)
    {
        ivset = srt->nvset++;
        hts_expand0(varset_t, srt->nvset, srt->mvset, srt->vset);

        varset_t *vset = &srt->vset[ivset];
        vset->nvar = 1;
        hts_expand0(var_t, vset->nvar, vset->mvar, vset->var);
        vset->var[vset->nvar-1] = ivar;
        var_t *var  = &srt->var[ivar];
        vset->cnt   = var->nvcf;
        vset->mask  = kbs_resize(vset->mask, srt->ngrp);
        kbs_clear(vset->mask);
        kbs_bitwise_or(vset->mask, var->mask);

        int type = 0;
        if ( var->type==VCF_REF ) type |= SR_REF;
        else
        {
            if ( var->type & VCF_SNP ) type |= SR_SNP;
            if ( var->type & VCF_MNP ) type |= SR_SNP;
            if ( var->type & VCF_INDEL ) type |= SR_INDEL;
            if ( var->type & VCF_OTHER ) type |= SR_OTHER;
        }
        var->type = type;
    }
#if DEBUG_VSETS
    debug_vsets(srt);
#endif

    // initialize the pairing matrix
    hts_expand(int, srt->ngrp*srt->nvset, srt->mpmat, srt->pmat);
    hts_expand(int, srt->nvset, srt->mcnt, srt->cnt);
    memset(srt->pmat, 0, sizeof(*srt->pmat)*srt->ngrp*srt->nvset);
    for (ivset=0; ivset<srt->nvset; ivset++)
    {
        varset_t *vset = &srt->vset[ivset];
        for (igrp=0; igrp<srt->ngrp; igrp++) srt->pmat[ivset*srt->ngrp+igrp] = 0;
        srt->cnt[ivset] = vset->cnt;
    }

    // pair the lines
    while ( srt->nvset )
    {
#if DEBUG_VSETS
    fprintf(stderr,"\n");
    debug_vsets(srt);
#endif

        int imax = 0;
        for (ivset=1; ivset<srt->nvset; ivset++)
            if ( srt->cnt[imax] < srt->cnt[ivset] ) imax = ivset;

        int ipair = -1;
        uint32_t max_score = 0;
        for (ivset=0; ivset<srt->nvset; ivset++)
        {
            if ( kbs_logical_and(srt->vset[imax].mask,srt->vset[ivset].mask) ) continue;   // cannot be merged
            uint32_t score = pairing_score(srt, imax, ivset);
            // fprintf(stderr,"score: %d %d, logic=%d \t..\t %u\n", imax,ivset,srt->pair,score);
            if ( max_score < score ) { max_score = score; ipair = ivset; }
        }

        // merge rows creating a new variant set this way
        if ( ipair!=-1 && ipair!=imax )
        {
            imax = merge_vsets(srt, imax, ipair);
            continue;
        }

        push_vset(srt, imax);
    }

    srt->chr = chr;
    srt->pos = min_pos;
}
Beispiel #23
0
void printReadBuffered(aRead &rd,aHead *hd,kstring_t &str) {
   str.l = 0;
   
   if(bam_validate1(hd,rd)==0){
     fprintf(stderr,"problems validateing\n");
     exit(0);
   }
   kputsn((char *)rd.vDat,rd.l_qname-1,&str);kputc('\t', &str);
   kputw((int)rd.flag_nc>>16, &str); kputc('\t', &str); 
   
   
   if(rd.refID==-1)//unmatched read
     kputc('*', &str);
   else
     kputs(hd->name[rd.refID] , &str);
   kputc('\t', &str); 
   
   kputw(rd.pos+1, &str);   kputc('\t', &str); 
   kputw(rd.mapQ, &str);kputc('\t', &str); 


   int nCigs = rd.nCig;

   if(nCigs==0)
     kputc('*', &str);// if no cigars
   else{
     for (int i = 0; i < nCigs; ++i) {//print cigars
       uint32_t *cigs =getCig(&rd);
       kputw(cigs[i]>>BAM_CIGAR_SHIFT, &str);
       kputc("MIDNSHP"[cigs[i]&BAM_CIGAR_MASK], &str);
     }
   }
   kputc('\t', &str); 
   
   if(rd.next_refID==-1)
     kputc('*', &str);// if no cigars     
   else if(rd.refID==rd.next_refID)
     kputc('=', &str);
   else
     kputs(hd->name[rd.next_refID] , &str);
   kputc('\t', &str); 

   kputw(rd.next_pos+1, &str);   kputc('\t', &str); 
   kputw(rd.tlen, &str);   kputc('\t', &str); 


   //start seq
   char *seq = (char *)getSeq(&rd);
   for(int i=0;i<rd.l_seq;i++)
     kputc(bam_nt16_rev_table2[bam1_seqi(seq, i)], &str); 
   
   kputc('\t', &str); 

   char *quals =(char *)getQuals(&rd);
   for(int i=0;i<rd.l_seq;i++)
     kputc(quals[i]+33, &str); 
   

   //below is taken directly from samtools,(not to steal, to preserve ordering etc, all credits go where credit is due)
   //from aux start to the last memadrs in chunk
   printAuxBuffered(getAuxStart(&rd),rd.vDat+rd.block_size,str);
   kputc('\n', &str); 
}
Beispiel #24
0
void abcGL::printLike(funkyPars *pars) {
  assert(pars->likes!=NULL);

  
  if(doGlf==1){
    //glffinn format
    for(int i=0;i<pars->numSites;i++){
      if(pars->keepSites[i]==0)
	continue;
      aio::bgzf_write(gzoutfile,pars->likes[i],sizeof(double)*10*pars->nInd);
    }
  }
  else if(doGlf==2){
    //beagle format
    bufstr.l = 0; //set tmpbuf beginning to zero
    for(int s=0;s<pars->numSites;s++) {
      lh3struct *lh3 = (lh3struct*) pars->extras[index+1];
      if(pars->keepSites[s]==0||lh3->hasAlloced[s]==0)
	continue;
      
      kputs(header->target_name[pars->refId],&bufstr);
      kputc('_',&bufstr);
      kputw(pars->posi[s]+1,&bufstr);
      kputc('\t',&bufstr);
      kputw(pars->major[s],&bufstr);
      kputc('\t',&bufstr);
      kputw(pars->minor[s],&bufstr);

      int major = pars->major[s];
      int minor = pars->minor[s];
      assert(major!=4&&minor!=4);

     
      for(int i=0;i<pars->nInd;i++) {
	double val[3];
	val[0]= exp(lh3->lh3[s][i*3+0]);
	val[1]= exp(lh3->lh3[s][i*3+1]);
	val[2]= exp(lh3->lh3[s][i*3+2]);
	angsd::norm(val,3);
	ksprintf(&bufstr, "\t%f",val[0]);
	ksprintf(&bufstr, "\t%f",val[1]);
	ksprintf(&bufstr, "\t%f",val[2]);
      }

      if(bufstr.l!=0)
	kputc('\n',&bufstr);

    }
    aio::bgzf_write(gzoutfile,bufstr.s,bufstr.l);bufstr.l=0;
  }
  else if(doGlf==3) { //FGV v0.208 Aug,28
    for(int s=0;s<pars->numSites;s++) {
      if(pars->keepSites[s]==0) //TSK 0.441 sep 25
	continue;
      char major = pars->major[s];
      char minor = pars->minor[s] ;
      assert(major!=4&&minor!=4);

      for(int i=0;i<pars->nInd;i++) {
	double dump[3];
	dump[0] = pars->likes[s][i*10+angsd::majorminor[major][major]] ;
	dump[1] = pars->likes[s][i*10+angsd::majorminor[major][minor]] ;
	dump[2] = pars->likes[s][i*10+angsd::majorminor[minor][minor]] ;
	aio::bgzf_write(gzoutfile,dump,3*sizeof(double));
      }
      bufstr.l=0;
      ksprintf(&bufstr,"%s\t%d\t",header->target_name[pars->refId],pars->posi[s]+1);
      ksprintf(&bufstr,"%c\t%c\n",intToRef[major],intToRef[minor]);
      aio::bgzf_write(gzoutfile2,bufstr.s,bufstr.l);bufstr.l=0;
    }
  } else if(doGlf==4){
    bufstr.l=0;
    //otherwise print textoutput
    for(int s=0;s<pars->numSites;s++){
      if(pars->keepSites[s]==0)
	continue;
      kputs(header->target_name[pars->refId],&bufstr);
      kputc('\t',&bufstr);
      kputw(pars->posi[s]+1,&bufstr);
      for(int i=0;i<10*pars->nInd;i++)      
	ksprintf(&bufstr, "\t%f",pars->likes[s][i]);

      kputc('\n',&bufstr);
    }
    aio::bgzf_write(gzoutfile,bufstr.s,bufstr.l);bufstr.l=0;
  }


}
Beispiel #25
0
void isec_vcf(args_t *args)
{
    bcf_srs_t *files = args->files;
    kstring_t str = {0,0,0};
    htsFile *out_fh = NULL;

    // When only one VCF is output, print VCF to stdout
    int out_std = 0;
    if ( args->nwrite==1 ) out_std = 1;
    if ( args->targets_list && files->nreaders==1 ) out_std = 1;
    if ( out_std ) 
    {
        out_fh = hts_open("-",hts_bcf_wmode(args->output_type));
        bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
        bcf_hdr_write(out_fh, files->readers[args->iwrite].header);
    }
    if ( !args->nwrite && !out_std && !args->prefix )
        fprintf(stderr,"Note: -w option not given, printing list of sites...\n");

    int n;
    while ( (n=bcf_sr_next_line(files)) )
    {
        bcf_sr_t *reader = NULL;
        bcf1_t *line = NULL;
        int i, ret = 0;
        for (i=0; i<files->nreaders; i++)
        {
            if ( !bcf_sr_has_line(files,i) ) continue;
            if ( !line ) 
            {
                line = files->readers[i].buffer[0];
                reader = &files->readers[i];
            }
            ret |= 1<<i;    // this may overflow for many files, but will be used only with two (OP_VENN)
        }

        switch (args->isec_op) 
        {
            case OP_COMPLEMENT: if ( n!=1 || !bcf_sr_has_line(files,0) ) continue; break;
            case OP_EQUAL: if ( n != args->isec_n ) continue; break;
            case OP_PLUS: if ( n < args->isec_n ) continue; break;
            case OP_MINUS: if ( n > args->isec_n ) continue;
        }

        if ( out_std )
        {
            if ( bcf_sr_has_line(files,args->iwrite) )
                bcf_write1(out_fh, files->readers[args->iwrite].header, files->readers[args->iwrite].buffer[0]);
            continue;
        }
        else if ( args->fh_sites )
        {
            str.l = 0;
            kputs(reader->header->id[BCF_DT_CTG][line->rid].key, &str); kputc('\t', &str);
            kputw(line->pos+1, &str); kputc('\t', &str);
            if (line->n_allele > 0) kputs(line->d.allele[0], &str);
            else kputc('.', &str);
            kputc('\t', &str);
            if (line->n_allele > 1) kputs(line->d.allele[1], &str);
            else kputc('.', &str);
            for (i=2; i<line->n_allele; i++)
            {
                kputc(',', &str);
                kputs(line->d.allele[i], &str);
            }
            kputc('\t', &str);
            for (i=0; i<files->nreaders; i++)
                kputc(bcf_sr_has_line(files,i)?'1':'0', &str);
            kputc('\n', &str);
            fwrite(str.s,sizeof(char),str.l,args->fh_sites);
        }

        if ( args->prefix )
        {
            if ( args->isec_op==OP_VENN )
                bcf_write1(args->fh_out[ret-1], reader->header, line);
            else
            {
                for (i=0; i<files->nreaders; i++)
                {
                    if ( !bcf_sr_has_line(files,i) ) continue;
                    if ( args->write && !args->write[i] ) continue;
                    bcf_write1(args->fh_out[i], files->readers[i].header, files->readers[i].buffer[0]);
                }
            }
        }
    }
    if ( str.s ) free(str.s);
    if ( out_fh ) hts_close(out_fh);
}
Beispiel #26
0
void likeClass::printLike(funkyPars *pars) {
  assert(pars->likes!=NULL);

  
  if(doGlf==1){
    //glffinn format
    for(int i=0;i<pars->numSites;i++){
      if(pars->keepSites[i]==0)
	continue;
      gzwrite(gzoutfile,pars->likes[i],sizeof(double)*10*pars->nInd);
    }
  }
  else if(doGlf==2){
    //beagle format
    bufstr.l = 0; //set tmpbuf beginning to zero
    for(int s=0;s<pars->numSites;s++) {
      if(pars->keepSites[s]==0)
	continue;
      
      kputs(header->name[pars->refId],&bufstr);
      kputc('_',&bufstr);
      kputw(pars->posi[s]+1,&bufstr);
      kputc('\t',&bufstr);
      kputw(pars->major[s],&bufstr);
      kputc('\t',&bufstr);
      kputw(pars->minor[s],&bufstr);

      int major = pars->major[s];
      int minor = pars->minor[s];
      assert(major!=4&&minor!=4);
	
      for(int i=0;i<pars->nInd;i++) {
	
	double norm=exp(pars->likes[s][i*10+angsd::majorminor[major][major]])+exp(pars->likes[s][i*10+angsd::majorminor[major][minor]])+exp(pars->likes[s][i*10+angsd::majorminor[minor][minor]]);
	double val1 = exp(pars->likes[s][i*10+angsd::majorminor[major][major]])/norm;
	double val2 = exp(pars->likes[s][i*10+angsd::majorminor[major][minor]])/norm;
	double val3 = exp(pars->likes[s][i*10+angsd::majorminor[minor][minor]])/norm;
	ksprintf(&bufstr, "\t%f",val1);
	ksprintf(&bufstr, "\t%f",val2);
	ksprintf(&bufstr, "\t%f",val3);
      }
      
      kputc('\n',&bufstr);

    }
    gzwrite(gzoutfile,bufstr.s,bufstr.l);
  }
  else if(doGlf==3) { //FGV v0.208 Aug,28
    for(int s=0;s<pars->numSites;s++) {
      if(pars->keepSites[s]==0) //TSK 0.441 sep 25
	continue;
      int major = pars->major[s];
      int minor = pars->minor[s] ;
      assert(major!=4&&minor!=4);

      for(int i=0;i<pars->nInd;i++) {
	double dump[3];
	dump[0] = pars->likes[s][i*10+angsd::majorminor[major][major]] ;
	dump[1] = pars->likes[s][i*10+angsd::majorminor[major][minor]] ;
	dump[2] = pars->likes[s][i*10+angsd::majorminor[minor][minor]] ;
	gzwrite(gzoutfile,dump,3*sizeof(double));
      }
    }
  } else if(doGlf==4){
    bufstr.l=0;
    //otherwise print textoutput
    for(int s=0;s<pars->numSites;s++){
      if(pars->keepSites[s]==0)
	continue;
      kputs(header->name[pars->refId],&bufstr);
      kputc('\t',&bufstr);
      kputw(pars->posi[s]+1,&bufstr);
      for(int i=0;i<10*pars->nInd;i++)      
	ksprintf(&bufstr, "\t%f",pars->likes[s][i]);

      kputc('\n',&bufstr);
    }
    gzwrite(gzoutfile,bufstr.s,bufstr.l);
  }


}
Beispiel #27
0
void isec_vcf(args_t *args)
{
    bcf_srs_t *files = args->files;
    kstring_t str = {0,0,0};
    htsFile *out_fh = NULL;

    // When only one VCF is output, print VCF to pysam_stdout or -o file
    int out_std = 0;
    if ( args->nwrite==1 && !args->prefix ) out_std = 1;
    if ( args->targets_list && files->nreaders==1 ) out_std = 1;
    if ( out_std )
    {
        out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
        if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
        if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
        if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
        bcf_hdr_write(out_fh, files->readers[args->iwrite].header);
    }
    if ( !args->nwrite && !out_std && !args->prefix )
        fprintf(pysam_stderr,"Note: -w option not given, printing list of sites...\n");

    int n;
    while ( (n=bcf_sr_next_line(files)) )
    {
        bcf_sr_t *reader = NULL;
        bcf1_t *line = NULL;
        int i, ret = 0;
        for (i=0; i<files->nreaders; i++)
        {
            if ( !bcf_sr_has_line(files,i) ) continue;

            if ( args->nflt && args->flt[i] )
            {
                bcf1_t *rec = bcf_sr_get_line(files, i);
                int pass = filter_test(args->flt[i], rec, NULL);
                if ( args->flt_logic[i] & FLT_EXCLUDE ) pass = pass ? 0 : 1;
                if ( !pass )
                {
                    files->has_line[i] = 0;
                    n--;
                    continue;
                }
            }

            if ( !line )
            {
                line = files->readers[i].buffer[0];
                reader = &files->readers[i];
            }
            ret |= 1<<i;    // this may overflow for many files, but will be used only with two (OP_VENN)
        }

        switch (args->isec_op)
        {
            case OP_COMPLEMENT: if ( n!=1 || !bcf_sr_has_line(files,0) ) continue; break;
            case OP_EQUAL: if ( n != args->isec_n ) continue; break;
            case OP_PLUS: if ( n < args->isec_n ) continue; break;
            case OP_MINUS: if ( n > args->isec_n ) continue; break;
            case OP_EXACT:
                for (i=0; i<files->nreaders; i++)
                    if ( files->has_line[i] != args->isec_exact[i] ) break;
                if ( i<files->nreaders ) continue;
                break;
        }

        if ( out_std )
        {
            if ( bcf_sr_has_line(files,args->iwrite) )
                bcf_write1(out_fh, files->readers[args->iwrite].header, files->readers[args->iwrite].buffer[0]);
            continue;
        }
        else if ( args->fh_sites )
        {
            str.l = 0;
            kputs(reader->header->id[BCF_DT_CTG][line->rid].key, &str); kputc('\t', &str);
            kputw(line->pos+1, &str); kputc('\t', &str);
            if (line->n_allele > 0) kputs(line->d.allele[0], &str);
            else kputc('.', &str);
            kputc('\t', &str);
            if (line->n_allele > 1) kputs(line->d.allele[1], &str);
            else kputc('.', &str);
            for (i=2; i<line->n_allele; i++)
            {
                kputc(',', &str);
                kputs(line->d.allele[i], &str);
            }
            kputc('\t', &str);
            for (i=0; i<files->nreaders; i++)
                kputc(bcf_sr_has_line(files,i)?'1':'0', &str);
            kputc('\n', &str);
            fwrite(str.s,sizeof(char),str.l,args->fh_sites);
        }

        if ( args->prefix )
        {
            if ( args->isec_op==OP_VENN && ret==3 )
            {
                if ( !args->nwrite || args->write[0] )
                    bcf_write1(args->fh_out[2], bcf_sr_get_header(files,0), bcf_sr_get_line(files,0));
                if ( !args->nwrite || args->write[1] )
                    bcf_write1(args->fh_out[3], bcf_sr_get_header(files,1), bcf_sr_get_line(files,1));
            }
            else
            {
                for (i=0; i<files->nreaders; i++)
                {
                    if ( !bcf_sr_has_line(files,i) ) continue;
                    if ( args->write && !args->write[i] ) continue;
                    bcf_write1(args->fh_out[i], files->readers[i].header, files->readers[i].buffer[0]);
                }
            }
        }
    }
    if ( str.s ) free(str.s);
    if ( out_fh ) hts_close(out_fh);
}
Beispiel #28
0
/**
 * Gets the base in the read that is mapped to a genomic position.
 * Extracts the read sequence and aualities too.
 */
void bam_get_base_and_qual_and_read_and_qual(bam1_t *srec, uint32_t pos, char& base, char& qual, int32_t& rpos, kstring_t* readseq, kstring_t* readqual)
{
    bam1_core_t *c = &srec->core;
    int32_t rlen = c->l_qseq;
    uint32_t cpos = c->pos; //reference coordinates of the first mapped base
    rpos = 0; //read coordinates

    kstring_t str;
    str.l = str.m = 0, str.s = 0;
    base = 'N';
    qual = 0;

    if (c->n_cigar)
    {
        uint32_t *cigar = bam_get_cigar(srec);
        for (uint32_t i = 0; i < c->n_cigar; ++i)
        {
            char op = bam_cigar_opchr(cigar[i]);
            str.l = 0;
            kputw(bam_cigar_oplen(cigar[i]), &str);
            char* stop;
            uint32_t len = strtol(str.s, &stop, 10);
            assert(stop);

            if (op=='M')
            {
                if (pos>=cpos && pos<=cpos+len-1)
                {
                    rpos += pos-cpos;
                    break;
                }

                cpos += len;
                rpos += len;
            }
            else if (op=='D')
            {
                if (pos>=cpos && pos<=cpos+len-1)
                {
                    rpos = -1;
                    break;
                }

                cpos += len;
            }
            else if (op=='S' || op=='I')
            {
                rpos += len;
            }
        }

        //std::cout << "bpos " << bpos << "\n";
        if (rpos>=0 && rpos<=rlen)
        {
            //sequence
            bam_get_seq_string(srec, readseq);
            base = readseq->s[rpos];

            //qual
            bam_get_qual_string(srec, readqual);
            qual = readqual->s[rpos];
        }
        else
        {
            rpos = BAM_READ_INDEX_NA;
        }
    }
//    std::cout << "b: " << base << "\n";
//    std::cout << "q: " << s[bpos-1] << " " << q << "\n";
//    for (uint32_t i = 0; i < c->l_qseq; ++i) std::cerr << ((char)(s[i] + 33));
};