Ejemplo n.º 1
0
Archivo: variant.cpp Proyecto: atks/vt
/**
 * Updates a bcf1_t object with VNTR information.
 */
void Variant::update_bcf_with_vntr_info(bcf_hdr_t *h, bcf1_t *v)
{
    bcf_update_info_flag(h, v, "FUZZY", NULL, 1);

    //VNTR position and sequences
    bcf_set_pos1(v, vntr.fuzzy_beg1);
    kstring_t s = {0,0,0};
    s.l = 0;
    kputs(vntr.fuzzy_repeat_tract.c_str(), &s);
    kputc(',', &s);
    kputs("<VNTR>", &s);
    bcf_update_alleles_str(h, v, s.s);

    //VNTR motif
    bcf_update_info_string(h, v, "MOTIF", vntr.motif.c_str());
    bcf_update_info_string(h, v, "RU", vntr.ru.c_str());

    //VNTR characteristics
    bcf_update_info_float(h, v, "FZ_CONCORDANCE", &vntr.fuzzy_score, 1);
    bcf_update_info_float(h, v, "FZ_RL", &vntr.fuzzy_rl, 1);
    bcf_update_info_float(h, v, "FZ_LL", &vntr.fuzzy_ll, 1);
    int32_t flank_pos1[2] = {vntr.exact_beg1-1, vntr.exact_end1+1};
    bcf_update_info_int32(h, v, "FLANKS", &flank_pos1, 2);

    //flank positions
    int32_t fuzzy_flank_pos1[2] = {vntr.fuzzy_beg1-1, vntr.fuzzy_end1+1};
    bcf_update_info_int32(h, v, "FZ_FLANKS", &fuzzy_flank_pos1, 2);
    int32_t ru_count[2] = {vntr.fuzzy_no_perfect_ru, vntr.fuzzy_no_ru};
    bcf_update_info_int32(h, v, "FZ_RU_COUNTS", &ru_count, 2);

    if (vntr.is_large_repeat_tract) bcf_update_info_flag(h, v, "LARGE_REPEAT_REGION", NULL, 1);

    if (s.m) free(s.s);
}
Ejemplo n.º 2
0
bcf1_t *process(bcf1_t *rec)
{
    int nval = 0, i, j, nret = bcf_get_format_values(in_hdr,rec,"GP",(void**)&buf,&nbuf,gp_type);
    if ( nret<0 )
    {
        if (!nskip_gp) fprintf(stderr, "[impute-info.c] Warning: info tag not added to sites without GP tag\n");
        nskip_gp++;
        return rec; // require FORMAT/GP tag, return site unchanged
    }

    nret /= rec->n_sample;
    if ( nret != 3 )
    {
        if (!nskip_dip) fprintf(stderr, "[impute-info.c] Warning: info tag not added to sites that are not biallelic diploid\n");
        nskip_dip++;
        return rec; // require biallelic diploid, return site unchanged
    }

    double esum = 0, e2sum = 0, fsum = 0;
    #define BRANCH(type_t,is_missing,is_vector_end) \
    { \
        type_t *ptr = (type_t*) buf; \
        for (i=0; i<rec->n_sample; i++) \
        { \
            double vals[3] = {0,0,0}; \
            for (j=0; j<nret; j++) \
            { \
                if ( is_missing || is_vector_end ) break; \
                vals[j] = ptr[j]; \
            } \
            esum  += vals[1] + 2*vals[2]; \
            e2sum += (vals[1] + 2*vals[2]) * (vals[1] + 2*vals[2]); \
            fsum  += vals[1] + 4*vals[2]; \
            ptr   += nret; \
            nval++; \
        } \
    }
    switch (gp_type)
    {
        case BCF_HT_INT:  BRANCH(int32_t,ptr[j]==bcf_int32_missing,ptr[j]==bcf_int32_vector_end); break;
        case BCF_HT_REAL: BRANCH(float,bcf_float_is_missing(ptr[j]),bcf_float_is_vector_end(ptr[j])); break;
    }
    #undef BRANCH

    double theta = esum / (2 * (double)nval);
    float info  = (theta>0 && theta<1) ? (float)(1 - (fsum - e2sum) / (2 * (double)nval * theta * (1.0 - theta))) : 1;

    bcf_update_info_float(out_hdr, rec, "INFO", &info, 1);
    nrec++;
    return rec;
}
Ejemplo n.º 3
0
bcf1_t *process(bcf1_t *rec)
{
  float af;
  if(rec->n_allele==2) {
    bcf_get_format_float(in_hdr, rec, "GL", &gl, &ngl);
    assert(ngl==(3*n));
    //    bcf_get_genotypes(in_hdr, rec, &gt, &ngt);
    //    assert(ngt==(2*n));
    af =  estimate_gt();
//    fprintf(stderr,"%d AF=%f\n",rec->pos+1,af);
    bcf_update_format_float(out_hdr,rec,"DS",dosage,n);
    bcf_update_info_float(out_hdr,rec,"AF",&af,1); 
    bcf_update_genotypes(out_hdr, rec, gt, ngt);
  }
  return rec;
}
Ejemplo n.º 4
0
static int update_bcf1(call_t *call, bcf1_t *rec, const bcf_p1rst_t *pr, double em[10])
{
    int has_I16, is_var;
    float fq, r;
    anno16_t a;
    float tmpf[4], tmpi;

    bcf_get_info_float(call->hdr, rec, "I16", &call->anno16, &call->n16);

    has_I16 = test16(call->anno16, &a) >= 0? 1 : 0;

    // print EM
    if (em[0] >= 0)
    {
        tmpf[0] = 1 - em[0];
        bcf_update_info_float(call->hdr, rec, "AF1", tmpf, 1);
    }
    if (em[4] >= 0 && em[4] <= 0.05)
    {
        tmpf[0] = em[3]; tmpf[1] = em[2]; tmpf[2] = em[1]; tmpf[3] = em[4];
        bcf_update_info_float(call->hdr, rec, "G3", tmpf, 3);
        bcf_update_info_float(call->hdr, rec, "HWE", &tmpf[3], 1);
    }
    if (em[5] >= 0 && em[6] >= 0)
    {
        tmpf[0] = 1 - em[5]; tmpf[1] = 1 - em[6];
        bcf_update_info_float(call->hdr, rec, "AF2", tmpf, 2);
    }
    if (em[7] >= 0)
    {
        tmpf[0] = em[7];
        bcf_update_info_float(call->hdr, rec, "LRT", tmpf, 1);
    }
    if (em[8] >= 0)
    {
        tmpf[0] = em[8];
        bcf_update_info_float(call->hdr, rec, "LRT2", tmpf, 1);
    }

    bcf_p1aux_t *p1 = call->cdat->p1;
    if (p1->cons_llr > 0)
    {
        tmpi = p1->cons_llr;
        bcf_update_info_int32(call->hdr, rec, "CLR", &tmpi, 1);
        // todo: trio calling with -c
        if (p1->cons_gt > 0)
        {
            char tmp[4];
            tmp[0] = p1->cons_gt&0xff; tmp[1] = p1->cons_gt>>8&0xff; tmp[2] = p1->cons_gt>>16&0xff; tmp[3] = 0;
            bcf_update_info_string(call->hdr, rec, "UGT", tmp);
            tmp[0] = p1->cons_gt>>32&0xff; tmp[1] = p1->cons_gt>>40&0xff; tmp[2] = p1->cons_gt>>48&0xff;
            bcf_update_info_string(call->hdr, rec, "CGT", tmp);
        }
Ejemplo n.º 5
0
Archivo: query.c Proyecto: CoREse/gqt
//{{{ void print_query_result_offset(uint32_t *mask,
void print_query_result_offset(uint32_t *mask,
                               uint32_t mask_len,
                               uint32_t *vids,
                               struct gqt_query *q,
                               uint32_t **counts,
                               uint32_t *id_lens,
                               uint32_t *U_R,
                               uint32_t U_R_len,
                               char **id_query_list,
                               char **gt_query_list,
                               uint32_t num_qs,
                               uint32_t num_fields,
                               char *off_file_name,
                               char *source_file,
                               char *full_cmd)
{
    struct off_file *off_f = open_off_file(off_file_name);
    struct bcf_file bcf_f = init_bcf_file(source_file);

    char *sample_names = NULL;

    uint32_t i,j,k,line_idx,bytes, bit_i = 0;
    int r;
    for (i = 0; i < U_R_len; ++i) {
        if (i == 0 )
            r = asprintf(&sample_names,
                         "%s",
                         bcf_f.hdr->samples[U_R[i]]);
        else
            r = asprintf(&sample_names,
                         "%s,%s",
                         sample_names,
                         bcf_f.hdr->samples[U_R[i]]);
        if (r == -1)
            err(EX_OSERR, "asprintf error");
    }

    if (bcf_hdr_set_samples(bcf_f.hdr, sample_names, 0) != 0)
        errx(EX_DATAERR, "Error setting samples: %s\n", source_file);

    char *info_s;
 
    for (i = 0; i < num_qs; i++) {
        if ( q[i].variant_op == p_count ) {
            r = asprintf(&info_s, "##INFO=<ID=GQT_%u,Number=1,Type=Integer,"
                         "Description=\"GQT count result from "
                         "phenotype:'%s' genotype:'%s'\">",
                         i, id_query_list[i], gt_query_list[i]);
            if (r == -1) err(EX_OSERR, "asprintf error");

            if (bcf_hdr_append(bcf_f.hdr, info_s) != 0)
                errx(EX_DATAERR, "Error updating header: %s\n", source_file);

        } else if ( q[i].variant_op == p_pct ) {
            r = asprintf(&info_s, "##INFO=<ID=GQT_%u,Number=1,Type=Float,"
                         "Description=\"GQT percent result from "
                         "phenotype:'%s' genotype:'%s'\">",
                         i, id_query_list[i], gt_query_list[i]);
            if (r == -1) err(EX_OSERR, "asprintf error");

            if (bcf_hdr_append(bcf_f.hdr, info_s) != 0)
                errx(EX_DATAERR, "Error updating header: %s\n", source_file);

        } else if ( q[i].variant_op == p_maf ) {
            r = asprintf(&info_s, "##INFO=<ID=GQT_%u,Number=1,Type=Float,"
                         "Description=\"GQT maf result from "
                         "phenotype:'%s' genotype:'%s'\">",
                         i, id_query_list[i], gt_query_list[i]);

            if (bcf_hdr_append(bcf_f.hdr, info_s) != 0)
                errx(EX_DATAERR, "Error updating header: %s\n", source_file);
        }

    }

    r = asprintf(&info_s, "##%s_queryVersion=%s", PROGRAM_NAME, VERSION);
    if (r == -1) err(EX_OSERR, "asprintf error");

    if (bcf_hdr_append(bcf_f.hdr, info_s) != 0)
        errx(EX_DATAERR, "Error updating header: %s\n", source_file);

    r = asprintf(&info_s, "##%s_queryCommand=%s", PROGRAM_NAME, full_cmd);
    if (r == -1) err(EX_OSERR, "asprintf error");

    if (bcf_hdr_append(bcf_f.hdr, info_s) != 0)
        errx(EX_DATAERR, "Error updating header: %s\n", source_file);

    htsFile *out_f = hts_open("-","w");
    if ( !out_f )
        err(EX_DATAERR, "Could open output file");

    bcf_hdr_write(out_f, bcf_f.hdr);

    bcf_f.line = bcf_init1();

    for (i=0; i < mask_len; ++i) {
        bytes = mask[i];
	if (bytes == 0)
            continue; /* skip a bunch of ops if you can */
        for (j=0; j < 32; j++) {
            if (bytes & 1 << (31 - j)) {
	        line_idx = i*32+j;

                r = goto_bcf_line(&bcf_f, off_f, line_idx);

                if (r == -1) 
                    err(EX_NOINPUT,
                        "Error seeking file '%s'", bcf_f.file_name);

                r = get_bcf_line(&bcf_f);
                if (r == -1) 
                    err(EX_NOINPUT,
                        "Error reading file '%s'", bcf_f.file_name);

                for (k=0; k < num_qs; k++) {
                    r = asprintf(&info_s, "GQT_%u", k);
                    if (r == -1)
                        err(EX_OSERR, "asprintf error");

                    if ( q[k].variant_op == p_count ) {
                        int32_t v = counts[k][line_idx];
                        if (bcf_update_info_int32(bcf_f.hdr,
                                                  bcf_f.line,
                                                  info_s,
                                                  &v,
                                                  1) != 0)
                            errx(EX_DATAERR,
                                 "Error adding to info field: %s\n",
                                 bcf_f.file_name);
                    } else if (q[k].variant_op == p_pct) {
                        float v = ((float)counts[k][line_idx])/
                                    ((float) id_lens[k]);
                        if (bcf_update_info_float(bcf_f.hdr,
                                                  bcf_f.line,
                                                  info_s,
                                                  &v,
                                                  1) != 0)
                            errx(EX_DATAERR,
                                 "Error adding to info field: %s\n",
                                 bcf_f.file_name);

                    } else if (q[k].variant_op == p_maf) {
                        float v = ((float)counts[k][line_idx])/
                                    (((float) id_lens[k])*2.0);
                        if (bcf_update_info_float(bcf_f.hdr,
                                                  bcf_f.line,
                                                  info_s,
                                                  &v,
                                                  1) != 0)
                            errx(EX_DATAERR,
                                 "Error adding to info field: %s\n",
                                 bcf_f.file_name);
                    }
                }

                bcf_write(out_f, bcf_f.hdr, bcf_f.line);

            }
	    bit_i++;
	    if (bit_i == num_fields)
	        break;
        }

        if (bit_i == num_fields)
            break;
    }
    hts_close(out_f);
    destroy_off_file(off_f);
}
Ejemplo n.º 6
0
void abcWriteBcf::print(funkyPars *pars){
  if(doBcf==0)
    return;
  kstring_t buf;
  if(fp==NULL){
    buf.s=NULL;buf.l=buf.m=0;
    fp=aio::openFileHts(outfiles,".bcf");
    hdr = bcf_hdr_init("w");
    rec    = bcf_init1();
    print_bcf_header(fp,hdr,args,buf,header);
  }
  lh3struct *lh3 = (lh3struct*) pars->extras[5];
  freqStruct *freq = (freqStruct *) pars->extras[6];
  genoCalls *geno = (genoCalls *) pars->extras[10];
  
  for(int s=0;s<pars->numSites;s++){
    if(pars->keepSites[s]==0)
      continue;

    rec->rid = bcf_hdr_name2id(hdr,header->target_name[pars->refId]);
    rec->pos = pars->posi[s];//<- maybe one index?
    //    bcf_update_id(hdr, rec, "rs6054257");
    char majmin[4]={intToRef[pars->major[s]],',',intToRef[pars->minor[s]],'\0'};
    bcf_update_alleles_str(hdr, rec, majmin);
    rec->qual = 29;
    // .. FILTER
    int32_t tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS");
    bcf_update_filter(hdr, rec, &tmpi, 1);
    // .. INFO
    
    tmpi = pars->keepSites[s];
    bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1);

    if(pars->counts){
      int depth = 0;
      for(int i=0; i<4*pars->nInd; i++)
	depth += pars->counts[s][i];
      tmpi = depth;
      bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1);

    }
    if(freq){
      float tmpf = freq->freq_EM[s];
      bcf_update_info_float(hdr, rec, "AF", &tmpf, 1);
    }
    
    // .. FORMAT
    assert(geno);
    if(geno){
      int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int32_t));
      for(int i=0; i<pars->nInd;i++){
	if(geno->dat[s][i]==0){
	  tmpia[2*i+0] = bcf_gt_unphased(0);
	  tmpia[2*i+1] = bcf_gt_unphased(0);
	}else if(geno->dat[s][i]==1){
	  tmpia[2*i+0] = bcf_gt_unphased(0);
	  tmpia[2*i+1] = bcf_gt_unphased(1);
	}  else{
	  tmpia[2*i+0] = bcf_gt_unphased(1);
	  tmpia[2*i+1] = bcf_gt_unphased(1);
	}
      }
      bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2); 
      free(tmpia);
    }
    if(pars->counts){
      int32_t *tmpfa = (int32_t*)malloc(sizeof(int32_t)*bcf_hdr_nsamples(hdr));
      suint *ary=pars->counts[s];
      for(int i=0;i<bcf_hdr_nsamples(hdr);i++)
	tmpfa[i] = ary[0]+ary[1]+ary[2]+ary[3];
      bcf_update_format_int32(hdr, rec, "DP", tmpfa,bcf_hdr_nsamples(hdr) );
      free(tmpfa);
    }
    assert(lh3);
    if(lh3){
      float *tmpfa  =   (float*)malloc(3*bcf_hdr_nsamples(hdr)*sizeof(float  ));
      int32_t *tmpi = (int32_t*)malloc(3*bcf_hdr_nsamples(hdr)*sizeof(int32_t));
      double *ary = lh3->lh3[s];
      for(int i=0;i<bcf_hdr_nsamples(hdr);i++)
	for(int j=0;j<3;j++){
	  tmpfa[i*3+j] = ary[i*3+j]/M_LN10;
	  tmpi[i*3+j] =(int) -log10(exp(ary[i*3+j]))*10.0;
	  //	  fprintf(stderr,"pl:%d raw:%f\n",tmpi[i*3+j],ary[i*3+j]);
	}
      bcf_update_format_float(hdr, rec, "GL", tmpfa,3*bcf_hdr_nsamples(hdr) );
      bcf_update_format_int32(hdr, rec, "PL", tmpi,3*bcf_hdr_nsamples(hdr) );
      free(tmpfa);
      free(tmpi);
    }

    if ( bcf_write1(fp, hdr, rec)!=0 ){
      fprintf(stderr,"Failed to write to \n");
      exit(0);
    }
    //    fprintf(stderr,"------\n");
    bcf_clear1(rec);
  }
}
Ejemplo n.º 7
0
bcf1_t *process(bcf1_t *rec)
{
    int i, ns = 0;

    bcf_unpack(rec, BCF_UN_FMT);
    bcf_fmt_t *fmt_gt = NULL;
    for (i=0; i<rec->n_fmt; i++)
        if ( rec->d.fmt[i].id==args.gt_id ) { fmt_gt = &rec->d.fmt[i]; break; }
    if ( !fmt_gt ) return rec;    // no GT tag

    hts_expand(int32_t,rec->n_allele,args.marr,args.arr);
    hts_expand(float,rec->n_allele,args.mfarr,args.farr);
    hts_expand(counts_t,rec->n_allele,args.mcounts,args.counts);
    memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
    memset(args.counts,0,sizeof(*args.counts)*rec->n_allele);

    #define BRANCH_INT(type_t,vector_end) { \
        for (i=0; i<rec->n_sample; i++) \
        { \
            type_t *p = (type_t*) (fmt_gt->p + i*fmt_gt->size); \
            int ial, als = 0; \
            for (ial=0; ial<fmt_gt->n; ial++) \
            { \
                if ( p[ial]==vector_end ) break; /* smaller ploidy */ \
                if ( bcf_gt_is_missing(p[ial]) ) break; /* missing allele */ \
                int idx = bcf_gt_allele(p[ial]); \
                \
                if ( idx >= rec->n_allele ) \
                    error("Incorrect allele (\"%d\") in %s at %s:%d\n",idx,args.in_hdr->samples[i],bcf_seqname(args.in_hdr,rec),rec->pos+1); \
                als |= (1<<idx);  /* this breaks with too many alleles */ \
            } \
            if ( ial==0 ) continue; /* missing alleles */ \
            ns++; \
            int is_hom  = als && !(als & (als-1)); /* only one bit is set */ \
            int is_hemi = ial==1; \
            for (ial=0; als; ial++) \
            { \
                if ( als&1 ) \
                { \
                    if ( !is_hom ) \
                        args.counts[ial].nhet++; \
                    else if ( !is_hemi ) \
                        args.counts[ial].nhom += 2; \
                    else \
                        args.counts[ial].nhemi++; \
                } \
                als >>= 1; \
            } \
        } \
    }
    switch (fmt_gt->type) {
        case BCF_BT_INT8:  BRANCH_INT(int8_t,  bcf_int8_vector_end); break;
        case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
        case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
        default: error("The GT type is not recognised: %d at %s:%d\n",fmt_gt->type, bcf_seqname(args.in_hdr,rec),rec->pos+1); break;
    }
    #undef BRANCH_INT
    if ( args.tags&SET_NS )
    {
        if ( bcf_update_info_int32(args.out_hdr,rec,"NS",&ns,1)!=0 )
            error("Error occurred while updating NS at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AN )
    {
        args.arr[0] = 0;
        for (i=0; i<rec->n_allele; i++)
            args.arr[0] += args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi;
        if ( bcf_update_info_int32(args.out_hdr,rec,"AN",args.arr,1)!=0 )
            error("Error occurred while updating AN at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AF )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            args.arr[0] = 0;
            for (i=0; i<rec->n_allele; i++)
                args.arr[0] += args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi;
            for (i=1; i<rec->n_allele; i++)
                args.farr[i] = (args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi)*1.0/args.arr[0];
        }
        if ( args.arr[0] )
        {
            if ( bcf_update_info_float(args.out_hdr,rec,"AF",args.farr+1,n)!=0 )
                error("Error occurred while updating AF at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
        }
    }
    if ( args.tags&SET_AC )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] = args.counts[i].nhet + args.counts[i].nhom + args.counts[i].nhemi;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC",args.arr+1,n)!=0 )
            error("Error occurred while updating AC at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AC_Het )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] += args.counts[i].nhet;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC_Het",args.arr+1,n)!=0 )
            error("Error occurred while updating AC_Het at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AC_Hom )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] += args.counts[i].nhom;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC_Hom",args.arr+1,n)!=0 )
            error("Error occurred while updating AC_Hom at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    if ( args.tags&SET_AC_Hemi )
    {
        int n = rec->n_allele-1;
        if ( n>0 )
        {
            memset(args.arr,0,sizeof(*args.arr)*rec->n_allele);
            for (i=1; i<rec->n_allele; i++)
                args.arr[i] += args.counts[i].nhemi;
        }
        if ( bcf_update_info_int32(args.out_hdr,rec,"AC_Hemi",args.arr+1,n)!=0 )
            error("Error occurred while updating AC_Hemi at %s:%d\n", bcf_seqname(args.in_hdr,rec),rec->pos+1);
    }
    return rec;
}