/**
 * Extract reference sequence region for motif discovery.
 *
 * The input is a VCF record that contains an indel.
 * 
 * If the the indel has multiple alleles, it will examine all
 * alleles.
 *
 * todo: is might be a good idea to combine this step with motif detection
 *       since there seems to be a need to have an iterative process here
 *       to ensure a good candidate motif is chosen. *  
 */
void CandidateRegionExtractor::extract_regions_by_exact_alignment(bcf_hdr_t* h, bcf1_t* v, Variant& variant)
{
    if (debug)
    {
        if (debug) std::cerr << "********************************************\n";
        std::cerr << "EXTRACTIING REGION BY EXACT LEFT AND RIGHT ALIGNMENT\n\n";
    }

    VNTR& vntr = variant.vntr;
    const char* chrom = bcf_get_chrom(h, v);

    int32_t min_beg1 = bcf_get_pos1(v);
    int32_t max_end1 = min_beg1;

    if (debug)
    {
       bcf_print_liten(h, v);
    }

    //merge candidate search region
    for (size_t i=1; i<bcf_get_n_allele(v); ++i)
    {
        std::string ref(bcf_get_alt(v, 0));
        std::string alt(bcf_get_alt(v, i));
        int32_t pos1 = bcf_get_pos1(v);

        //this prevents introduction of flanks that do not harbour the repeat unit
        trim(pos1, ref, alt);

        int32_t end1 = pos1 + ref.size() - 1;
        right_align(chrom, end1, ref, alt);

        int32_t beg1 = end1 - ref.size() + 1;
        left_align(chrom, beg1, ref, alt);

        min_beg1 = beg1<min_beg1 ? beg1 : min_beg1;
        max_end1 = end1>max_end1 ? end1 : max_end1;

        int32_t seq_len;
        char* seq = faidx_fetch_seq(fai, chrom, min_beg1-1, max_end1-1, &seq_len);

        if (debug)
        {
            std::cerr << "EXACT REGION " << min_beg1 << "-" << max_end1 << " (" << max_end1-min_beg1+1 <<") from " << pos1 << ":" << ref << ":" << alt << "\n";
            std::cerr << "             " << seq << "\n";
        }

        if (seq_len) free(seq);
    }

    int32_t seq_len;
    char* seq = faidx_fetch_seq(fai, chrom, min_beg1-1, max_end1-1, &seq_len);

    if (debug)
    {
        std::cerr << "FINAL EXACT REGION " << min_beg1 << "-" << max_end1 << " (" << max_end1-min_beg1+1 <<") " << "\n";
        std::cerr << "                   " << seq << "\n";
    }

    vntr.exact_repeat_tract = seq;
    vntr.rid = bcf_get_rid(v);
    vntr.exact_rbeg1 = min_beg1;
    vntr.exact_rend1 = max_end1;
    
    if (seq_len) free(seq);
}
Exemple #2
0
/**
 * Constructor.
 */
Variant::Variant(bcf_hdr_t* h, bcf1_t* v)
{
    this->h = h;
    this->v = v;

    type = classify(h, v);

    chrom = bcf_get_chrom(h, v);
    rid = bcf_get_rid(v);
    pos1 = bcf_get_pos1(v);

    no_overlapping_snps = 0;
    no_overlapping_indels = 0;
    no_overlapping_vntrs = 0;

    is_new_multiallelic =  false;

    //attempts to update relevant information on variants
    if (type==VT_SNP)
    {
        beg1 = bcf_get_pos1(v);
        end1 = bcf_get_pos1(v);
    }
    else if (type==VT_INDEL)
    {
        beg1 = bcf_get_pos1(v);
        end1 = bcf_get_info_int(h, v, "END", 0);

        //annotate ends
        if (!end1) end1 = bcf_get_end1(v);
    }
    //complex variants
    else if (type & (VT_SNP|VT_MNP|VT_INDEL|VT_CLUMPED))
    {
        beg1 = bcf_get_pos1(v);
        end1 = bcf_get_info_int(h, v, "END", 0);
        if (!end1) end1 = bcf_get_end1(v);
    }
    else if (type==VT_VNTR)
    {
        beg1 = bcf_get_pos1(v);
        end1 = bcf_get_info_int(h, v, "END", 0);
        if (!end1) end1 = bcf_get_end1(v);

        update_vntr_from_info_fields(h, v);

        vs.push_back(v);
        vntr_vs.push_back(v);
    }
    else if (type==VT_SV)
    {
        beg1 = bcf_get_pos1(v);
        end1 = bcf_get_info_int(h, v, "END", 0);
        if (!end1) end1 = bcf_get_end1(v);
    }
    else
    {
        std::cerr << "unexpected type in variant construction\n";
        print();
        exit(1);
    }
}
Exemple #3
0
/**
 * Updates VNTR related information from INFO fields.
 */
void Variant::update_vntr_from_info_fields()
{
    vntr.motif = bcf_get_rid(v);
    char** allele = bcf_get_allele(v);
//    vntr.exact_repeat_tract.assign(allele[0]);
//   std::string tags[16] = {"MOTIF", "RU", "BASIS", "MLEN", "BLEN", "REPEAT_TRACT", "COMP", "ENTROPY", "ENTROPY2", "KL_DIVERGENCE", "KL_DIVERGENCE2", "RL", "LL", "RU_COUNTS", "SCORE", "TRF_SCORE"};

    vntr.motif = bcf_get_info_str(h, v, "MOTIF");
    vntr.ru = bcf_get_info_str(h, v, "RU");
    vntr.basis = bcf_get_info_str(h, v, "BASIS");
    if (vntr.basis=="") vntr.basis = VNTR::get_basis(vntr.motif);
    vntr.mlen = vntr.motif.size();
    vntr.blen = (int32_t) vntr.basis.size();
    std::vector<int32_t> i_vec = bcf_get_info_int_vec(h, v, "REPEAT_TRACT", 2, 0);
    vntr.beg1 = i_vec[0];
    vntr.end1 = i_vec[1];
    i_vec = bcf_get_info_int_vec(h, v, "COMP", 4, 0);
    vntr.comp[0] = i_vec[0];
    vntr.comp[1] = i_vec[1];
    vntr.comp[2] = i_vec[2];
    vntr.comp[3] = i_vec[3];
    vntr.entropy = bcf_get_info_flt(h, v, "ENTROPY");
    vntr.entropy2 = bcf_get_info_flt(h, v, "ENTROPY2");
    vntr.kl_divergence = bcf_get_info_flt(h, v, "KL_DIVERGENCE");
    vntr.kl_divergence2 = bcf_get_info_flt(h, v, "KL_DIVERGENCE2");
    vntr.rl = bcf_get_info_int(h, v, "RL");
    vntr.ll = bcf_get_info_int(h, v, "LL");
    i_vec = bcf_get_info_int_vec(h, v, "RU_COUNTS", 2, 0);
    vntr.no_perfect_ru = i_vec[0];
    vntr.no_ru = i_vec[1];
    vntr.score = bcf_get_info_flt(h, v, "SCORE");
    vntr.trf_score = bcf_get_info_int(h, v, "TRF_SCORE");

    vntr.exact_motif = bcf_get_info_str(h, v, "EX_MOTIF");
    vntr.exact_ru = bcf_get_info_str(h, v, "EX_RU");
    vntr.exact_basis = bcf_get_info_str(h, v, "EX_BASIS");
    vntr.exact_mlen = (int32_t) vntr.exact_motif.size();
    vntr.exact_blen = (int32_t) vntr.exact_basis.size();
    i_vec = bcf_get_info_int_vec(h, v, "EX_REPEAT_TRACT", 2, 0);
    vntr.exact_beg1 = i_vec[0];
    vntr.exact_end1 = i_vec[1];
    i_vec = bcf_get_info_int_vec(h, v, "EX_COMP", 4, 0);
    vntr.exact_comp[0] = i_vec[0];
    vntr.exact_comp[1] = i_vec[1];
    vntr.exact_comp[2] = i_vec[2];
    vntr.exact_comp[3] = i_vec[3];
    vntr.exact_entropy = bcf_get_info_flt(h, v, "EX_ENTROPY");
    vntr.exact_entropy2 = bcf_get_info_flt(h, v, "EX_ENTROPY2");
    vntr.exact_kl_divergence = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE");
    vntr.exact_kl_divergence2 = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE2");
    vntr.exact_rl = bcf_get_info_int(h, v, "EX_RL");
    vntr.exact_ll = bcf_get_info_int(h, v, "EX_LL");
    i_vec = bcf_get_info_int_vec(h, v, "EX_RU_COUNTS", 2, 0);
    vntr.exact_no_perfect_ru = i_vec[0];
    vntr.exact_no_ru = i_vec[1];
    vntr.exact_score = bcf_get_info_flt(h, v, "EX_SCORE");
    vntr.exact_trf_score = bcf_get_info_int(h, v, "EX_TRF_SCORE");

    vntr.fuzzy_motif = bcf_get_info_str(h, v, "FZ_MOTIF");
    vntr.fuzzy_ru = bcf_get_info_str(h, v, "FZ_RU");
    vntr.fuzzy_basis = bcf_get_info_str(h, v, "FZ_BASIS");
    vntr.fuzzy_mlen = (int32_t) vntr.fuzzy_motif.size();
    vntr.fuzzy_blen = (int32_t) vntr.fuzzy_basis.size();
    i_vec = bcf_get_info_int_vec(h, v, "FZ_REPEAT_TRACT", 2, 0);
    vntr.fuzzy_beg1 = i_vec[0];
    vntr.fuzzy_end1 = i_vec[1];
    i_vec = bcf_get_info_int_vec(h, v, "FZ_COMP", 4, 0);
    vntr.fuzzy_comp[0] = i_vec[0];
    vntr.fuzzy_comp[1] = i_vec[1];
    vntr.fuzzy_comp[2] = i_vec[2];
    vntr.fuzzy_comp[3] = i_vec[3];
    vntr.fuzzy_entropy = bcf_get_info_flt(h, v, "FZ_ENTROPY");
    vntr.fuzzy_entropy2 = bcf_get_info_flt(h, v, "FZ_ENTROPY2");
    vntr.fuzzy_kl_divergence = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE");
    vntr.fuzzy_kl_divergence2 = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE2");
    vntr.fuzzy_rl = bcf_get_info_int(h, v, "FZ_RL");
    vntr.fuzzy_ll = bcf_get_info_int(h, v, "FZ_LL");
    i_vec = bcf_get_info_int_vec(h, v, "FZ_RU_COUNTS", 2, 0);
    vntr.fuzzy_no_perfect_ru = i_vec[0];
    vntr.fuzzy_no_ru = i_vec[1];
    vntr.fuzzy_score = bcf_get_info_flt(h, v, "FZ_SCORE");
    vntr.fuzzy_trf_score = bcf_get_info_int(h, v, "FZ_TRF_SCORE");
}
Exemple #4
0
/**
 * Classifies variants.
 */
int32_t Variant::classify(bcf_hdr_t *h, bcf1_t *v)
{
    clear();

    this->h = h;
    this->v = v;

    bcf_unpack(v, BCF_UN_STR);
    chrom.assign(bcf_get_chrom(h, v));
    rid = bcf_get_rid(v);
    pos1 = bcf_get_pos1(v);
    end1 = bcf_get_end1(v);
    char** allele = bcf_get_allele(v);
    int32_t n_allele = bcf_get_n_allele(v);
    int32_t pos0 = pos1-1;

    bool homogeneous_length = true;
    char* ref = allele[0];
    int32_t rlen = strlen(ref);

    if (strchr(ref, 'N'))
    {
        contains_N = true;
    }

    //if only ref allele, skip this entire for loop
    for (size_t i=1; i<n_allele; ++i)
    {
        int32_t allele_type = VT_REF;

        //check for symbolic alternative alleles
        if (strchr(allele[i],'<'))
        {
            size_t len = strlen(allele[i]);
            if (len>=5)
            {
                //VN/d+
                if (allele[i][0]=='<' && allele[i][1]=='V' && allele[i][2]=='N' && allele[i][len-1]=='>' )
                {
                    for (size_t j=3; j<len-1; ++j)
                    {
                        if (allele[i][j]<'0' || allele[i][j]>'9')
                        {
                            allele_type = VT_VNTR;
                        }
                    }
                }
                //VNTR
                else if (len==6 &&
                         allele[i][0]=='<' &&
                         allele[i][1]=='V' && allele[i][2]=='N' && allele[i][3]=='T' && allele[i][4]=='R' &&
                         allele[i][5]=='>' )
                {
                     allele_type = VT_VNTR;
                }
                //STR
                else if (len==5 &&
                         allele[i][0]=='<' &&
                         allele[i][1]=='S' && allele[i][2]=='T' && allele[i][3]=='R' &&
                         allele[i][4]=='>' )
                {
                     allele_type = VT_VNTR;
                }
                //ST/d+
                else if (allele[i][0]=='<' && allele[i][1]=='S' && allele[i][2]=='T' && allele[i][len-1]=='>' )
                {
                    type = VT_VNTR;

                    for (size_t j=3; j<len-1; ++j)
                    {
                        if ((allele[i][j]<'0' || allele[i][j]>'9') && allele[i][j]!='.')
                        {
                            type = VT_SV;
                        }
                    }
                }
            }

            if (allele_type==VT_VNTR)
            {
                allele_type = VT_VNTR;
                type |= allele_type;
                alleles.push_back(Allele(allele_type));
            }
            else
            {
                allele_type = VT_SV;
                type |= allele_type;
                std::string sv_type(allele[i]);
                alleles.push_back(Allele(allele_type, sv_type));
            }
        }
        //checks for chromosomal breakpoints
        else if (strchr(allele[i],'[')||strchr(allele[i],']'))
        {
            allele_type = VT_SV;
            type |= allele_type;
            std::string sv_type("<BND>");
            alleles.push_back(Allele(allele_type, sv_type));
        }
        //non variant record
        else if (allele[i][0]=='.' || strcmp(allele[i],allele[0])==0)
        {
            type = VT_REF;
        }
        //explicit sequence of bases
        else
        {
            kstring_t REF = {0,0,0};
            kstring_t ALT = {0,0,0};

            ref = allele[0];
            char* alt = allele[i];
            int32_t alen = strlen(alt);

            if (strchr(alt, 'N'))
            {
                contains_N = true;
            }

            if (rlen!=alen)
            {
                homogeneous_length = false;
            }

            //trimming
            //this is required in particular for the
            //characterization of multiallelics and
            //in general, any unnormalized variant
            int32_t rl = rlen;
            int32_t al = alen;
            //trim right
            while (rl!=1 && al!=1)
            {
                if (ref[rl-1]==alt[al-1])
                {
                    --rl;
                    --al;
                }
                else
                {
                    break;
                }
            }

            //trim left
            while (rl !=1 && al!=1)
            {
                if (ref[0]==alt[0])
                {
                    ++ref;
                    ++alt;
                    --rl;
                    --al;
                }
                else
                {
                    break;
                }
            }

            kputsn(ref, rl, &REF);
            kputsn(alt, al, &ALT);

            ref = REF.s;
            alt = ALT.s;

            int32_t mlen = std::min(rl, al);
            int32_t dlen = al-rl;
            int32_t diff = 0;
            int32_t ts = 0;
            int32_t tv = 0;

            if (mlen==1 && dlen)
            {
                char ls, le, ss;

                if (rl>al)
                {
                     ls = ref[0];
                     le = ref[rl-1];
                     ss = alt[0];
                }
                else
                {
                     ls = alt[0];
                     le = alt[al-1];
                     ss = ref[0];
                }

                if (ls!=ss && le!=ss)
                {
                    ++diff;

                    if ((ls=='G' && ss=='A') ||
                        (ls=='A' && ss=='G') ||
                        (ls=='C' && ss=='T') ||
                        (ls=='T' && ss=='C'))
                    {
                        ++ts;
                    }
                    else
                    {
                        ++tv;
                    }
                }
            }
            else
            {
                for (int32_t j=0; j<mlen; ++j)
                {
                    if (ref[j]!=alt[j])
                    {
                        ++diff;

                        if ((ref[j]=='G' && alt[j]=='A') ||
                            (ref[j]=='A' && alt[j]=='G') ||
                            (ref[j]=='C' && alt[j]=='T') ||
                            (ref[j]=='T' && alt[j]=='C'))
                        {
                            ++ts;
                        }
                        else
                        {
                            ++tv;
                        }
                    }
                }
            }

            //substitution variants
            if (mlen==diff)
            {
                allele_type |= mlen==1 ? VT_SNP : VT_MNP;
            }

            //indel variants
            if (dlen)
            {
                allele_type |= VT_INDEL;
            }

            //clumped SNPs and MNPs
            if (diff && diff < mlen) //internal gaps
            {
                allele_type |= VT_CLUMPED;
            }

            type |= allele_type;
            alleles.push_back(Allele(type, diff, alen, dlen, mlen, ts, tv));
            ts += ts;
            tv += tv;
            ins = dlen>0?1:0;
            del = dlen<0?1:0;

            if (REF.m) free(REF.s);
            if (ALT.m) free(ALT.s);
        }
    }

    if (type==VT_VNTR)
    {
        update_vntr_from_info_fields(h, v);
    }

    //additionally define MNPs by length of all alleles
    if (!(type&(VT_VNTR|VT_SV)) && type!=VT_REF)
    {
        if (homogeneous_length && rlen>1 && n_allele>1)
        {
            type |= VT_MNP;
        }
    }

    return type;
}
Exemple #5
0
/**
 * Annotates VNTR characteristics.
 * @mode -
 */
void VNTRAnnotator::annotate(bcf_hdr_t* h, bcf1_t* v, Variant& variant, std::string mode)
{
    VNTR& vntr = variant.vntr;

    //update chromosome and position
    variant.rid = bcf_get_rid(v);
    variant.pos1 = bcf_get_pos1(v);

    //this is for reannotating an VNTR record
    //this is more for the purpose of evaluation to
    //check if vt's algorithm is concordant with
    //VNTRs from other sources.
    if (variant.type==VT_VNTR)
    {
        if (debug) std::cerr << "ANNOTATING VNTR/STR \n";

        //1. pick candidate region
        cre->pick_candidate_region(h, v, variant, REFERENCE);
        
        //2. detect candidate motifs from a reference seqeuence
        cmp->generate_candidate_motifs(h, v, variant);
        cmp->next_motif(h, v, variant);
    }
    //main purpose - annotation of Indels.
    else if (variant.type&VT_INDEL)
    {
        //the basic steps in annotating a TR
        //
        //1. extract a region that has a chance of containing the repeat units
        //2. choose a set of candidate motifs and pick motif
        //3. detect repeat region and evaluate
        //4. iterate 2 and 3
        
        //EXACT MODE
        if (mode=="e")
        {
            if (debug) std::cerr << "============================================\n";
            if (debug) std::cerr << "ANNOTATING INDEL EXACTLY\n";

            //1. pick candidate region using exact left and right alignment
            cre->pick_candidate_region(h, v, variant, EXACT_LEFT_RIGHT_ALIGNMENT);

            //2. evaluate reference length
            fd->detect_flanks(h, v, variant, CLIP_ENDS);

            if (debug) std::cerr << "============================================\n";
            return;
        }
        //FUZZY DETECTION
        else if (mode=="f")
        {
            if (debug) std::cerr << "============================================\n";
            if (debug) std::cerr << "ANNOTATING INDEL FUZZILY\n";

            //1. selects candidate region by fuzzy left and right alignment
            cre->pick_candidate_region(h, v, variant, EXACT_LEFT_RIGHT_ALIGNMENT);
            
            //2. detect candidate motifs from a reference sequence
            cmp->generate_candidate_motifs(h, v, variant);
            
            if (!cmp->next_motif(h, v, variant))
            {
                std::cerr << "oops, no candidate motif for next step\n";
                    
            }
            //3. evaluate reference length
            fd->detect_flanks(h, v, variant, FRAHMM);

            //introduce reiteration based on concordance and exact concordance.

            if (debug) std::cerr << "============================================\n";
            return;
        }
    }
}
Exemple #6
0
/**
 * Gets records for the most recent position and fills up the buffer from file i.
 * returns true if buffer is filled or it is not necessary to fill buffer.
 * returns false if no more records are found to fill buffer
 */
void BCFSyncedReader::fill_buffer(int32_t i)
{
    if (buffer[i].size()>=2)
        return;

    if (random_access)
    {
        int32_t pos1 = buffer[i].size()==0 ? 0 : bcf_get_pos1(buffer[i].front());

        if (ftypes[i].format==bcf)
        {
            bcf1_t *v = get_bcf1_from_pool();
            bool populated = false;

            while (itrs[i] && bcf_itr_next(files[i], itrs[i], v)>=0)
            {
                populated = true;
                bcf_unpack(v, BCF_UN_STR);
                
                //check to ensure order
                if (!buffer[i].empty())
                {
                    if (!bcf_is_in_order(buffer[i].back(), v))
                    {
                        fprintf(stderr, "[E:%s:%d %s] VCF file not in order: %s\n", __FILE__, __LINE__, __FUNCTION__, file_names[i].c_str());
                        exit(1);
                    }
                }
                
                buffer[i].push_back(v);
                insert_into_pq(i, v);

                if (pos1==0)
                {
                    pos1 = bcf_get_pos1(v);
                }

                if (bcf_get_pos1(v)!=pos1)
                {
                    break;
                }

                v = get_bcf1_from_pool();
                populated = false;
            }

            if (!populated)
                store_bcf1_into_pool(v);
        }
        else if (ftypes[i].format==vcf)
        {
            while (itrs[i] && tbx_itr_next(files[i], tbxs[i], itrs[i], &s)>=0)
            {
                bcf1_t *v = get_bcf1_from_pool();
                vcf_parse(&s, hdrs[i], v);

                bcf_unpack(v, BCF_UN_STR);
                
                //check to ensure order
                if (!buffer[i].empty())
                {
                    if (!bcf_is_in_order(buffer[i].back(), v))
                    {
                        fprintf(stderr, "[E:%s:%d %s] VCF file not in order: %s\n", __FILE__, __LINE__, __FUNCTION__, file_names[i].c_str());
                        exit(1);
                    }
                }
                
                buffer[i].push_back(v);
                insert_into_pq(i, v);

                if (pos1==0)
                {
                    pos1 = bcf_get_pos1(v);
                }

                if (bcf_get_pos1(v)!=pos1)
                {
                    break;
                }
            }
        }
    }
    else
    {
        int32_t rid = buffer[i].size()==0 ? -1 : bcf_get_rid(buffer[i].front());
        int32_t pos1 = buffer[i].size()==0 ? 0 : bcf_get_pos1(buffer[i].front());

        bcf1_t *v = get_bcf1_from_pool();
        bool populated = false;

        while (bcf_read(files[i], hdrs[i], v)>=0)
        {
            populated = true;
            bcf_unpack(v, BCF_UN_STR);
            
            //check to ensure order
            if (!buffer[i].empty())
            {
                if (!bcf_is_in_order(buffer[i].back(), v))
                {
                    fprintf(stderr, "[E:%s:%d %s] VCF file not in order: %s\n", __FILE__, __LINE__, __FUNCTION__, file_names[i].c_str());
                    exit(1);
                }
            }
            
            buffer[i].push_back(v);
            insert_into_pq(i, v);

            if (rid==-1)
            {
                rid = bcf_get_rid(v);
                pos1 = bcf_get_pos1(v);
            }

            if (bcf_get_rid(v)!=rid || bcf_get_pos1(v)!=pos1)
            {
                break;
            }

            v = get_bcf1_from_pool();
            populated = false;
        }

        if (!populated)
            store_bcf1_into_pool(v);
    }
}
Exemple #7
0
/**
 * Inserts a record into pq.
 */
void BCFSyncedReader::insert_into_pq(int32_t i, bcf1_t *v)
{
    pq.push(new bcfptr(i, bcf_get_rid(v), bcf_get_pos1(v), hdrs[i], v, sync_by_pos));
}
Exemple #8
0
/**
 * Constructor.
 * @v - VCF record.
 */
GenotypingRecord::GenotypingRecord(bcf_hdr_t *h, bcf1_t *v, int32_t vtype)
{
    clear();
    this->h = h;
    this->v = v;
    rid = bcf_get_rid(v);
    pos1 = bcf_get_pos1(v);
    this->vtype = vtype;
    int32_t n_allele = bcf_get_n_allele(v);
    
    if (vtype==VT_SNP && n_allele==2)
    {
        rid = bcf_get_rid(v);
        beg1 = bcf_get_pos1(v);
        end1 = beg1;
    }
    else if (vtype==VT_INDEL && bcf_get_n_allele(v)==2)
    {
        rid = bcf_get_rid(v);
        char** alleles = bcf_get_allele(v);
        dlen = strlen(alleles[1])-strlen(alleles[0]);
        len = abs(dlen);

        int32_t *flanks = NULL;
        int32_t n = 0;
        if (bcf_get_info_int32(h, v, "FLANKS", &flanks, &n)>0)
        {
            lend1 = flanks[0];
            rbeg1 = flanks[1];
            free(flanks);
        }
        else
        {
            lend1 = bcf_get_pos1(v) - 1;
            rbeg1 = bcf_get_end_pos1(v) + 1;
        }

        int32_t *fuzzy_flanks = NULL;
        n = 0;
        if (bcf_get_info_int32(h, v, "FZ_FLANKS", &fuzzy_flanks, &n)>0)
        {
            fuzzy_lend1 = fuzzy_flanks[0];
            fuzzy_rbeg1 = fuzzy_flanks[1];
            free(fuzzy_flanks);
        }
        else
        {
            fuzzy_lend1 = bcf_get_pos1(v) - 1;
            fuzzy_rbeg1 = bcf_get_end_pos1(v) + 1;
        }

        beg1 = std::min(lend1-2, fuzzy_lend1-2);
        end1 = std::max(rbeg1+2, fuzzy_rbeg1+2);
    
        //construct alleles
        
        //get reference sequence
//        char* ref_seq = NULL;
//        int32_t ref_len = 0;
////        ref_seq = faidx_fetch_seq(fai, bcf_get_chrom(h,v), lend1+1-1, rbeg1-1-1, &ref_len);
//        
//        for (uint32_t i=0; i<n_allele; ++i)
//        {
//            
//        }
        
//        for ()
//        {
//        }
//    
        if (dlen>0)
        {
            indel.append(&alleles[1][1]);
        }
        else
        {
            indel.append(&alleles[0][1]);
        }
    }
    else if (vtype==VT_VNTR)
    {
        rid = bcf_get_rid(v);
        beg1 = bcf_get_pos1(v) - 1;
        end1 = bcf_get_end_pos1(v) + 1;
        
        char *motif = NULL;
        int32_t n = 0;
        
        if (bcf_get_info_string(h, v, "MOTIF", &motif, &n)>0)
        {
           this->motif.assign(motif);
           free(motif);
        }
    }
}