/** * Assumes that the variant is appropriately updated on its VNTR chracteristics. * Updates tree on motif, exact/fuzziness of VNTR. */ void VNTRTree::count(Variant& variant) { if (variant.type == VT_VNTR) { VNTR& vntr = variant.vntr; VNTRNode* node = NULL; if (motif_map.find(vntr.motif)==motif_map.end()) { node = new VNTRNode(vntr.motif, vntr.basis, 0, 0); motif_map[vntr.motif] = node; int32_t basis_len = vntr.basis.size(); int32_t motif_len = vntr.motif.size(); if (vntrs[basis_len-1].size()<motif_len-basis_len+1) { vntrs[basis_len-1].resize(motif_len-basis_len+1); } vntrs[basis_len-1][motif_len-basis_len].push_back(node); } else { node = motif_map[vntr.motif]; } float concordance = -1; float *score = NULL; int32_t n = 0; if (bcf_get_info_float(variant.h, variant.v, "SCORE", &score, &n)>0) { concordance = score[0]; free(score); } else if (bcf_get_info_float(variant.h, variant.v, "CONCORDANCE", &score, &n)>0) { concordance = score[0]; free(score); } else if (bcf_get_info_float(variant.h, variant.v, "TRF_SCORE", &score, &n)>0) { concordance = score[0]; free(score); } if (concordance==1 || (vntr.fuzzy_beg1!=0 && vntr.exact_beg1==vntr.fuzzy_beg1 && vntr.exact_end1==vntr.fuzzy_end1)) { ++node->exact_count; } else { ++node->fuzzy_count; } } };
static void _prune_sites(vcfbuf_t *buf, int flush_all) { int nbuf = flush_all ? buf->rbuf.n : buf->rbuf.n - 1; if ( nbuf > buf->prune.mvrec ) { buf->prune.idx = (int*) realloc(buf->prune.idx, nbuf*sizeof(int)); buf->prune.vrec = (vcfrec_t**) realloc(buf->prune.vrec, nbuf*sizeof(vcfrec_t*)); buf->prune.mvrec = nbuf; } // set allele frequency and prepare buffer for sorting int i,k,irec = 0; for (i=-1; rbuf_next(&buf->rbuf,&i) && irec<nbuf; ) { bcf1_t *line = buf->vcf[i].rec; if ( line->n_allele > buf->prune.mac ) { buf->prune.ac = (int*) realloc(buf->prune.ac, line->n_allele*sizeof(*buf->prune.ac)); buf->prune.mac = line->n_allele; } if ( !buf->vcf[i].af_set ) { buf->vcf[i].af = 0; if ( buf->prune.af_tag ) { if ( bcf_get_info_float(buf->hdr,line,buf->prune.af_tag,&buf->prune.farr, &buf->prune.mfarr) > 0 ) buf->vcf[i].af = buf->prune.farr[0]; } else if ( bcf_calc_ac(buf->hdr, line, buf->prune.ac, BCF_UN_INFO|BCF_UN_FMT) ) { int ntot = buf->prune.ac[0], nalt = 0; for (k=1; k<line->n_allele; k++) nalt += buf->prune.ac[k]; buf->vcf[i].af = ntot ? (float)nalt/ntot : 0; } buf->vcf[i].af_set = 1; } buf->vcf[i].idx = irec; buf->prune.vrec[irec++] = &buf->vcf[i]; } // sort by allele frequency, low AF will be removed preferentially qsort(buf->prune.vrec, nbuf, sizeof(*buf->prune.vrec), cmpvrec); // sort the rbuf indexes to be pruned descendently so that j-th rbuf index // is removed before i-th index if i<j int nprune = nbuf - buf->prune.max_sites; for (i=0; i<nprune; i++) buf->prune.idx[i] = buf->prune.vrec[i]->idx; qsort(buf->prune.idx, nprune, sizeof(int), cmpint_desc); for (i=0; i<nprune; i++) rbuf_remove_kth(&buf->rbuf, vcfrec_t, buf->prune.idx[i], buf->vcf); }
static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double em[10]) { int has_I16, is_var; float fq, r; anno16_t a; float tmpf[4], tmpi; bcf_get_info_float(call->hdr, rec, "I16", &call->anno16, &call->n16); has_I16 = test16(call->anno16, &a) >= 0? 1 : 0; // print EM if (em[0] >= 0) { tmpf[0] = 1 - em[0]; bcf_update_info_float(call->hdr, rec, "AF1", tmpf, 1); } if (em[4] >= 0 && em[4] <= 0.05) { tmpf[0] = em[3]; tmpf[1] = em[2]; tmpf[2] = em[1]; tmpf[3] = em[4]; bcf_update_info_float(call->hdr, rec, "G3", tmpf, 3); bcf_update_info_float(call->hdr, rec, "HWE", &tmpf[3], 1); } if (em[5] >= 0 && em[6] >= 0) { tmpf[0] = 1 - em[5]; tmpf[1] = 1 - em[6]; bcf_update_info_float(call->hdr, rec, "AF2", tmpf, 2); } if (em[7] >= 0) { tmpf[0] = em[7]; bcf_update_info_float(call->hdr, rec, "LRT", tmpf, 1); } if (em[8] >= 0) { tmpf[0] = em[8]; bcf_update_info_float(call->hdr, rec, "LRT2", tmpf, 1); } bcf_p1aux_t *p1 = call->cdat->p1; if (p1->cons_llr > 0) { tmpi = p1->cons_llr; bcf_update_info_int32(call->hdr, rec, "CLR", &tmpi, 1); // todo: trio calling with -c if (p1->cons_gt > 0) { char tmp[4]; tmp[0] = p1->cons_gt&0xff; tmp[1] = p1->cons_gt>>8&0xff; tmp[2] = p1->cons_gt>>16&0xff; tmp[3] = 0; bcf_update_info_string(call->hdr, rec, "UGT", tmp); tmp[0] = p1->cons_gt>>32&0xff; tmp[1] = p1->cons_gt>>40&0xff; tmp[2] = p1->cons_gt>>48&0xff; bcf_update_info_string(call->hdr, rec, "CGT", tmp); }
/** * Evaluates the actions for this node. */ void Node::evaluate(bcf_hdr_t *h, bcf1_t *v, Variant *variant, bool debug) { if (debug) std::cerr << "evaluation " << type << "\n"; if (type&VT_LOGIC_OP) { if (type==VT_NOT) { if (debug) std::cerr << "\tVT_NOT " << left->value << " \n"; value = !(left->value); } else if (type==VT_AND) { if (debug) std::cerr << "\tVT_AND " << left->value << "&" << right->value << " \n"; value = (left->value && right->value); } else if (type==VT_OR) { value = (left->value || right->value); } } else if (type&VT_MATH_CMP) { if (type==VT_EQ) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { if (debug) std::cerr << "\tVT_EQ " << left->i << "&" << right->i << " \n"; value = (left->i==right->i); return; } else if ((right->type&VT_FLT)) { if (debug) std::cerr << "\tVT_EQ " << left->i << "&" << right->f << " \n"; value = (left->i==right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { if (debug) std::cerr << "\tVT_EQ " << left->f << "&" << right->i << " \n"; value = (left->f==right->i); return; } else if ((right->type&VT_FLT)) { if (debug) std::cerr << "\tVT_EQ " << left->f << "&" << right->f << " \n"; value = (left->f==right->f); return; } } else if ((left->type&VT_STR) && (right->type&VT_STR)) { if (debug) std::cerr << "\tVT_EQ " << left->tag.s << "&" << right->tag.s << " \n"; value = strcmp(left->tag.s, right->tag.s)==0 ? true : false; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported : == %d %d\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } else if (type==VT_NE) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { value = (left->i!=right->i); return; } else if ((right->type&VT_FLT)) { value = (left->i!=right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { value = (left->f!=right->i); return; } else if ((right->type&VT_FLT)) { value = (left->f!=right->f); return; } } else if ((left->type&VT_STR) && (right->type&VT_STR)) { value = strcmp(left->tag.s, right->tag.s)==0 ? false : true; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported: %d %d: !=\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } else if (type==VT_LE) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { value = (left->i<=right->i); return; } else if ((right->type&VT_FLT)) { value = (left->i<=right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { type |= VT_INT; value = (left->f<=right->i); return; } else if ((right->type&VT_FLT)) { value = (left->f<=right->f); return; } } else if ((left->type&VT_STR) && (right->type&VT_STR)) { value = strcmp(left->tag.s, right->tag.s)<=0 ? true : false; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported: %d %d: <=\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } else if (type==VT_GE) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { value = (left->i>=right->i); return; } else if ((right->type&VT_FLT)) { value = (left->i>=right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { value = (left->f>=right->i); return; } else if ((right->type&VT_FLT)) { value = (left->f>=right->f); return; } } else if ((left->type&VT_STR) && (right->type&VT_STR)) { value = strcmp(left->tag.s, right->tag.s)>=0 ? true : false; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported: %d %d: >=\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } else if (type==VT_GT) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { value = (left->i>right->i); return; } else if ((right->type&VT_FLT)) { value = (left->i>right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { value = (left->f>right->i); return; } else if ((right->type&VT_FLT)) { value = (left->f>right->f); return; } } else if ((left->type&VT_STR) && (right->type&VT_STR)) { value = strcmp(left->tag.s, right->tag.s)>0 ? true : false; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported: %d %d: >\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } else if (type==VT_LT) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { value = (left->i<right->i); return; } else if ((right->type&VT_FLT)) { value = (left->i<right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { value = (left->f<right->i); return; } else if ((right->type&VT_FLT)) { value = (left->f<right->f); return; } } else if ((left->type&VT_STR) && (right->type&VT_STR)) { value = strcmp(left->tag.s, right->tag.s)<0 ? true : false; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported: %d %d: <\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } } else if (type&VT_BCF_OP) { if (type==VT_FILTER) { if (bcf_has_filter(h, v, tag.s)!=1) { value = false; } else { value = true; } } else if (type==VT_INFO) { int32_t *data = NULL; int32_t n=0; if (bcf_get_info_int32(h, v, tag.s, &data, &n)>0) { type |= VT_INT; i = *data; f = (float)i; } else if (bcf_get_info_float(h, v, tag.s, &data, &n)>0) { type |= VT_FLT; f = (float)(*data); } else if (bcf_get_info_string(h, v, tag.s, &data, &n)>0) { type |= VT_STR; s.l=0; for (int32_t i=0; i<n; ++i) { kputc(data[i], &s); } } else if (bcf_get_info_flag(h, v, tag.s, 0, 0)>0) { type |= VT_FLG; i = 1; f = 1; b = true; value = true; s.l=0; } else { i = 0; f = 0; b = false; value = false; s.l=0; } if (n) free(data); } else if (type==(VT_INFO|VT_INT)) { int32_t *data = NULL; int32_t n=0; if (bcf_get_info_int32(h, v, tag.s, &data, &n)>0) { i = *((int*)data); } if (n) free(data); } else if (type==(VT_INFO|VT_FLT)) { int32_t *data = NULL; int32_t n=0; if (bcf_get_info_float(h, v, tag.s, &data, &n)>0) { f = *((float*)data); } if (n) free(data); } else if (type==(VT_INFO|VT_STR)) { int32_t *data = NULL; int32_t n=0; if (bcf_get_info_string(h, v, tag.s, &data, &n)>0) { s.l=0; for (int32_t i=0; i<n; ++i) { kputc(data[i], &s); } } if (n) free(data); } else if (type==(VT_INFO|VT_FLG)) { if (bcf_get_info_flag(h, v, tag.s, 0, 0)>0) { i = 1; f = 1; b = true; value = true; //s.l=0; kputc('1', &s); } else { i = 0; f = 0; b = false; value = false; s.l=0; } if (debug) std::cerr << "\tVT_INFO|VT_FLG " << i << " " << f << " " << b << " " << value << " " << s.s << " \n"; } else if (type==VT_VARIANT_TYPE) { if (debug) std::cerr << "\tVTYPE " << variant->vtype2string(variant->type) << " \n"; i = variant->type; value = i; } else if (type==VT_VARIANT_DLEN) { if (debug) std::cerr << "\tDLEN " << variant->alleles[0].dlen << " \n"; i = variant->alleles[0].dlen; value = i; } else if (type==VT_VARIANT_LEN) { if (debug) std::cerr << "\tLEN " << abs(variant->alleles[0].dlen) << " \n"; i = abs(variant->alleles[0].dlen); value = i; } else if (type==VT_N_ALLELE) { if (debug) std::cerr << "\tN_ALLELE " << bcf_get_n_allele(v) << " \n"; i = bcf_get_n_allele(v); } } else if (type&VT_MATH_OP) { if ((type&8207)==VT_ADD) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { type |= VT_INT; i = (left->i+right->i); return; } else if ((right->type&VT_FLT)) { type |= VT_FLT; f = (left->i+right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { type |= VT_FLT; f = (left->f+right->i); return; } else if ((right->type&VT_FLT)) { type |= VT_FLT; f = (left->f+right->f); return; } } fprintf(stderr, "[%s:%d %s] evaluation not supported : +\n", __FILE__, __LINE__, __FUNCTION__); exit(1); } else if ((type&8207)==VT_SUB) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { type |= VT_INT; i = (left->i-right->i); return; } else if ((right->type&VT_FLT)) { type |= VT_FLT; f = (left->i-right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { type |= VT_FLT; f = (left->f-right->i); return; } else if ((right->type&VT_FLT)) { type |= VT_FLT; f = (left->f-right->f); return; } } fprintf(stderr, "[%s:%d %s] evaluation not supported : -\n", __FILE__, __LINE__, __FUNCTION__); exit(1); } else if ((type&8207)==VT_MUL) { if ((left->type&VT_INT)) { if ((right->type&VT_INT)) { type |= VT_INT; i = (left->i*right->i); return; } else if ((right->type&VT_FLT)) { type |= VT_FLT; f = (left->i*right->f); return; } } else if ((left->type&VT_FLT)) { if ((right->type&VT_INT)) { type |= VT_FLT; f = (left->f*right->i); return; } else if ((right->type&VT_FLT)) { type |= VT_FLT; f = (left->f*right->f); return; } } fprintf(stderr, "[%s:%d %s] evaluation not supported : *\n", __FILE__, __LINE__, __FUNCTION__); exit(1); } else if ((type&8207)==VT_DIV) { if (left->type&VT_INT) { if (right->type&VT_INT) { type |= VT_FLT; f = ((float)left->i/right->i); return; } else if (right->type&VT_FLT) { type |= VT_FLT; f = (left->i/right->f); return; } } else if (left->type&VT_FLT) { if (right->type&VT_INT) { type |= VT_FLT; f = (left->f/right->i); return; } else if (right->type&VT_FLT) { type |= VT_FLT; f = (left->f/right->f); return; } } fprintf(stderr, "[%s:%d %s] evaluation not supported : /\n", __FILE__, __LINE__, __FUNCTION__); exit(1); } else if (type==VT_BIT_AND) { if ((left->type&VT_INT) && (right->type&VT_INT)) { i = (left->i & right->i); value = i; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported for & : %d %d\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } else if (type==VT_BIT_OR) { if ((left->type&VT_INT) && (right->type&VT_INT)) { i = (left->i | right->i); value = i; return; } fprintf(stderr, "[%s:%d %s] evaluation not supported for | : %d %d\n", __FILE__, __LINE__, __FUNCTION__, left->type, right->type); exit(1); } else { fprintf(stderr, "[%s:%d %s] math op not supported : %d\n", __FILE__, __LINE__, __FUNCTION__, (type&15)); exit(1); } } }
int parse_line(args_t *args, bcf1_t *line, double *alt_freq, double *pdg) { args->nitmp = 0; // Set allele frequency int ret; if ( args->af_tag ) { // Use an INFO tag provided by the user ret = bcf_get_info_float(args->hdr, line, args->af_tag, &args->AFs, &args->mAFs); if ( ret==1 ) *alt_freq = args->AFs[0]; if ( ret==-2 ) error("Type mismatch for INFO/%s tag at %s:%d\n", args->af_tag, bcf_seqname(args->hdr,line), line->pos+1); } else if ( args->af_fname ) { // Read AF from a file ret = read_AF(args->files->targets, line, alt_freq); } else { // Use GTs or AC/AN: GTs when AC/AN not present or when GTs explicitly requested by --estimate-AF ret = -1; if ( !args->estimate_AF ) { int AC = -1, AN = 0; ret = bcf_get_info_int32(args->hdr, line, "AN", &args->itmp, &args->mitmp); if ( ret==1 ) { AN = args->itmp[0]; ret = bcf_get_info_int32(args->hdr, line, "AC", &args->itmp, &args->mitmp); if ( ret>0 ) AC = args->itmp[0]; } if ( AN<=0 || AC<0 ) ret = -1; else *alt_freq = (double) AC/AN; } if ( ret==-1 ) ret = estimate_AF(args, line, alt_freq); // reads GTs into args->itmp } if ( ret<0 ) return ret; if ( *alt_freq==0.0 ) { if ( args->dflt_AF==0 ) return -1; // we skip sites with AF=0 *alt_freq = args->dflt_AF; } // Set P(D|G) if ( args->fake_PLs ) { if ( !args->nitmp ) { args->nitmp = bcf_get_genotypes(args->hdr, line, &args->itmp, &args->mitmp); if ( args->nitmp != 2*args->nsmpl ) return -1; // not diploid? args->nitmp /= args->nsmpl; } int32_t *gt = &args->itmp[args->ismpl*args->nitmp]; if ( bcf_gt_is_missing(gt[0]) || bcf_gt_is_missing(gt[1]) ) return -1; int a = bcf_gt_allele(gt[0]); int b = bcf_gt_allele(gt[1]); if ( a!=b ) { pdg[0] = pdg[2] = args->unseen_PL; pdg[1] = 1 - 2*args->unseen_PL; } else if ( a==0 ) { pdg[0] = 1 - 2*args->unseen_PL; pdg[1] = pdg[2] = args->unseen_PL; } else { pdg[0] = pdg[1] = args->unseen_PL; pdg[2] = 1 - 2*args->unseen_PL; } } else { args->nitmp = bcf_get_format_int32(args->hdr, line, "PL", &args->itmp, &args->mitmp); if ( args->nitmp != args->nsmpl*line->n_allele*(line->n_allele+1)/2. ) return -1; // not diploid? args->nitmp /= args->nsmpl; int32_t *pl = &args->itmp[args->ismpl*args->nitmp]; pdg[0] = pl[0] < 256 ? args->pl2p[ pl[0] ] : 1.0; pdg[1] = pl[1] < 256 ? args->pl2p[ pl[1] ] : 1.0; pdg[2] = pl[2] < 256 ? args->pl2p[ pl[2] ] : 1.0; double sum = pdg[0] + pdg[1] + pdg[2]; if ( !sum ) return -1; pdg[0] /= sum; pdg[1] /= sum; pdg[2] /= sum; } return 0; }