Example #1
0
/**
 * Gets a sorted string representation of the alleles of a variant.
 */
void bcf_alleles2string_sorted(bcf_hdr_t *h, bcf1_t *v, kstring_t *var)
{
    bcf_unpack(v, BCF_UN_STR);
    var->l = 0;
    
    if (v->n_allele==2)
    {
        kputs(bcf_get_alt(v, 0), var);
        kputc(',', var);
        kputs(bcf_get_alt(v, 1), var);
    }
    else
    {
        char** allele = bcf_get_allele(v);
        char** temp = (char**) malloc((bcf_get_n_allele(v)-1)*sizeof(char*));
        for (int32_t i=1; i<v->n_allele; ++i)
        {
            temp[i-1] = allele[i];
        }
        
        std::qsort(temp, bcf_get_n_allele(v)-1, sizeof(char*), cmpstr);
        
        kputs(bcf_get_alt(v, 0), var);
        for (int32_t i=0; i<v->n_allele-1; ++i)
        {
            kputc(',', var);
            kputs(temp[i], var);
        }
        
        free(temp);
    }
}
Example #2
0
/**
 * Pick candidate motifs in different modes.
 * Invokes motif tree and the candidate motifs are stored in a
 * heap within the motif tree.
 */
void CandidateMotifPicker::generate_candidate_motifs(bcf_hdr_t* h, bcf1_t* v, Variant& variant)
{
    if (debug)
    {
        std::cerr << "********************************************\n";
        std::cerr << "PICK CANDIDATE MOTIFS\n\n";
    }

    if (variant.ins)
    {
        char** alleles = bcf_get_allele(v);

        if (debug)
        {
            const char* repeat_tract = variant.vntr.exact_repeat_tract.c_str();
            std::cerr << "Longest Allele : "   << alleles[0][0] << "[" <<  &alleles[1][1]  << "]" << &repeat_tract[1] << "\n";
        }

        //spike in inserted allele
        std::string spiked_seq(alleles[1]);
        std::string insertion = variant.vntr.exact_repeat_tract.substr(strlen(alleles[0]), variant.vntr.exact_repeat_tract.size()-strlen(alleles[0]));
        spiked_seq.append(insertion);
        mt->detect_candidate_motifs(spiked_seq);
        
        indel_sequence.assign(&alleles[1][1]);
    }
    else
    {
        mt->detect_candidate_motifs(variant.vntr.exact_repeat_tract);

        char** alleles = bcf_get_allele(v);
        indel_sequence.assign(&alleles[0][1]);
    }
    
    if (debug)
    {
        std::cerr << "Indel : "  << indel_sequence << "\n";
    }
}
/**
 * Pick candidate region.
 *
 * @mode - REFERENCE     use refence field
 *       - ALLELE_EXACT  by exact alignment
 *       - ALLELE_FUZZY  by fuzzy alignment
 */
void CandidateRegionExtractor::pick_candidate_region(bcf_hdr_t* h, bcf1_t* v, Variant& variant, uint32_t mode)
{
    if (mode==REFERENCE)
    {
        VNTR& vntr = variant.vntr;
        vntr.exact_repeat_tract.assign(bcf_get_ref(v));
        vntr.exact_rbeg1 = bcf_get_pos1(v);
        char** alleles = bcf_get_allele(v);
        vntr.exact_rend1 = strlen(alleles[0]);
        vntr.fuzzy_rbeg1 = vntr.exact_rbeg1;
        vntr.fuzzy_rend1 = vntr.exact_rend1;
        
    }
    else if (mode==EXACT_LEFT_RIGHT_ALIGNMENT)
    {
        extract_regions_by_exact_alignment(h, v, variant);
    }
    else if (mode==FUZZY_LEFT_RIGHT_ALIGNMENT)
    {
        extract_regions_by_fuzzy_alignment(h, v, variant);
    }
}
Example #4
0
/**
 * Classifies variants.
 */
int32_t VariantManip::classify_variant(bcf_hdr_t *h, bcf1_t *v, Variant& var)
{
    bcf_unpack(v, BCF_UN_STR);
    const char* chrom = bcf_get_chrom(h, v);
    uint32_t pos1 = bcf_get_pos1(v);
    char** allele = bcf_get_allele(v);
    int32_t n_allele = bcf_get_n_allele(v);

    int32_t pos0 = pos1-1;
    var.ts = 0;
    var.tv = 0;
    var.ins = 0;
    var.del = 0;

    var.clear(); // this sets the type to VT_REF by default.

    bool homogeneous_length = true;

    char* ref = allele[0];
    int32_t rlen = strlen(ref);

    //if only ref allele, skip this entire for loop
    for (size_t i=1; i<n_allele; ++i)
    {
        int32_t type = VT_REF;

        //check for tags
        if (strchr(allele[i],'<'))
        {
            size_t len = strlen(allele[i]);
            if (len>=5)
            {
                //VN/d+
                if (allele[i][0]=='<' && allele[i][1]=='V' && allele[i][2]=='N' && allele[i][len-1]=='>' )
                {
                    for (size_t j=3; j<len-1; ++j)
                    {
                        if (allele[i][j]<'0' || allele[i][j]>'9')
                        {
                            type = VT_VNTR;
                        }
                    }
                }
                //VNTR
                else if (len==6 &&
                         allele[i][0]=='<' &&
                         allele[i][1]=='V' && allele[i][2]=='N' && allele[i][3]=='T' && allele[i][4]=='R' &&
                         allele[i][5]=='>' )
                {
                     type = VT_VNTR;
                }
                //ST/d+
                else if (allele[i][0]=='<' && allele[i][1]=='S' && allele[i][2]=='T' && allele[i][len-1]=='>' )
                {
                    for (size_t j=3; j<len-1; ++j)
                    {
                        if (allele[i][j]<'0' || allele[i][j]>'9')
                        {
                            type = VT_VNTR;
                        }
                    }
                }
                //STR
                else if (len==5 &&
                         allele[i][0]=='<' &&
                         allele[i][1]=='S' && allele[i][2]=='T' && allele[i][3]=='R' &&
                         allele[i][4]=='>' )
                {
                     type = VT_VNTR;
                }
            }
                        
            if (type==VT_VNTR)
            {
                type = VT_VNTR;
                var.type |= type;
                var.alleles.push_back(Allele(type));
            }
            else
            {
                type = VT_SV;
                var.type |= type;
                std::string sv_type(allele[i]);
                var.alleles.push_back(Allele(type, sv_type));
            }
        }
        else if (allele[i][0]=='.' || strcmp(allele[i],allele[0])==0)
        {
            type = VT_REF;
        }
        else
        {
            kstring_t REF = {0,0,0};
            kstring_t ALT = {0,0,0};

            ref = allele[0];
            char* alt = allele[i];
            int32_t alen = strlen(alt);

            if (rlen!=alen)
            {
                homogeneous_length = false;
            }

            //trimming
            //this is required in particular for the
            //characterization of multiallelics and
            //in general, any unnormalized variant
            int32_t rl = rlen;
            int32_t al = alen;
            //trim right
            while (rl!=1 && al!=1)
            {
                if (ref[rl-1]==alt[al-1])
                {
                    --rl;
                    --al;
                }
                else
                {
                    break;
                }
            }

            //trim left
            while (rl !=1 && al!=1)
            {
                if (ref[0]==alt[0])
                {
                    ++ref;
                    ++alt;
                    --rl;
                    --al;
                }
                else
                {
                    break;
                }
            }

            kputsn(ref, rl, &REF);
            kputsn(alt, al, &ALT);

            ref = REF.s;
            alt = ALT.s;

            int32_t mlen = std::min(rl, al);
            int32_t dlen = al-rl;
            int32_t diff = 0;
            int32_t ts = 0;
            int32_t tv = 0;

            if (mlen==1 && dlen)
            {
                char ls, le, ss;

                if (rl>al)
                {
                     ls = ref[0];
                     le = ref[rl-1];
                     ss = alt[0];
                }
                else
                {
                     ls = alt[0];
                     le = alt[al-1];
                     ss = ref[0];
                }

                if (ls!=ss && le!=ss)
                {
                    ++diff;

                    if ((ls=='G' && ss=='A') ||
                        (ls=='A' && ss=='G') ||
                        (ls=='C' && ss=='T') ||
                        (ls=='T' && ss=='C'))
                    {
                        ++ts;
                    }
                    else
                    {
                        ++tv;
                    }
                }
            }
            else
            {
                for (int32_t j=0; j<mlen; ++j)
                {
                    if (ref[j]!=alt[j])
                    {
                        ++diff;

                        if ((ref[j]=='G' && alt[j]=='A') ||
                            (ref[j]=='A' && alt[j]=='G') ||
                            (ref[j]=='C' && alt[j]=='T') ||
                            (ref[j]=='T' && alt[j]=='C'))
                        {
                            ++ts;
                        }
                        else
                        {
                            ++tv;
                        }
                    }
                }
            }

            //substitution variants
            if (mlen==diff)
            {
                type |= mlen==1 ? VT_SNP : VT_MNP;
            }

            //indel variants
            if (dlen)
            {
                type |= VT_INDEL;
            }

            //clumped SNPs and MNPs
            if (diff && diff < mlen) //internal gaps
            {
                type |= VT_CLUMPED;
            }

            var.type |= type;
            var.alleles.push_back(Allele(type, diff, alen, dlen, mlen, ts, tv));
            var.ts += ts;
            var.tv += tv;
            var.ins = dlen>0?1:0;
            var.del = dlen<0?1:0;

            if (REF.m) free(REF.s);
            if (ALT.m) free(ALT.s);
        }
    }

    if (var.type==VT_VNTR)
    {
        bcf_unpack(v, BCF_UN_INFO);
        
        //populate motif, motif len etc. etc.
//        char* str = NULL;
//        int32_t n = 0;
//        int32_t ret = bcf_get_info_string(h, v, "MOTIF", &str, &n);
//        if (ret>0) 
//        {
//            var.motif = std::string(str);
//            var.mlen = var.motif.size();
//        }
//        ret = bcf_get_info_string(h, v, "RU", &str, &n);
//        if (ret>0) 
//        {
//            var.ru = std::string(str);
//            var.mlen = var.ru.size();
//        }
//        if (n) free(str);
//        
//        int32_t* no = NULL;
//        n = 0;    
//        ret = bcf_get_info_int32(h, v, "RL", &no, &n);
//        if (ret>0) var.rlen = *no;
//        if (n) free(no);
//            
//        int32_t* fl = NULL;
//        n = 0;                                    
//        ret = bcf_get_info_int32(h, v, "REF", &fl, &n);
//        if (ret>0) var.rcn = *fl;
//        if (n) free(fl);                        
    }
    
    //additionally define MNPs by length of all alleles
    if (!(var.type&(VT_VNTR|VT_SV)) && var.type!=VT_REF)
    {
        if (homogeneous_length && rlen>1 && n_allele>1)
        {
            var.type |= VT_MNP;
        }
    }

    return var.type;
}
Example #5
0
/**
 * Checks if a variant is normalized.
 *  Ignores if entry is not a variant.
 */
bool VariantManip::is_normalized(bcf1_t *v)
{
    char** alleles = bcf_get_allele(v);
    int32_t n_allele = bcf_get_n_allele(v);
        
    if (n_allele==1) return true;
        
    char first_base;
    char last_base;
    size_t rlen, alen, len;
    bool exists_len_one_allele = false;
    bool first_base_same = true;
    bool last_base_same = true;

    if (n_allele==2)
    {
        rlen = strlen(alleles[0]);
        alen = strlen(alleles[1]);

        if (rlen==1&&alen==1)
        {
            return true;
        }
        else
        {
            //check if variant is reference.
            if (rlen==alen)
            {
                if (strcmp(alleles[0], alleles[1])==0)
                {
                    return true;
                }    
            }    
            
            //ref
            if (rlen==1) exists_len_one_allele = true;
            first_base = alleles[0][0];
            last_base = alleles[0][rlen-1];

            //alt
            if (alen==1) exists_len_one_allele = true;
            if (first_base!=alleles[1][0]) first_base_same = false;
            if (last_base!=alleles[1][alen-1]) last_base_same = false;

            if (last_base_same || (!exists_len_one_allele && first_base_same))
            {
                return false;
            }

            return true;
        }
    }
    else
    {
        bool same = true;
        for (size_t i=0; i<n_allele; ++i)
        {
            if (i)
            {
                len = strlen(alleles[i]);
                if (len==1) exists_len_one_allele = true;
                if (first_base!=alleles[i][0]) first_base_same = false;
                if (last_base!=alleles[i][len-1]) last_base_same = false;
            
                same = same && strcmp(alleles[i],alleles[0])==0;
            }
            else
            {
                len = strlen(alleles[0]);
                if (len==1) exists_len_one_allele = true;
                first_base = alleles[0][0];
                last_base = alleles[0][len-1];
            }
        }

        //reference entry
        if (same)
        {
            return true;
        }    

        if (last_base_same || (!exists_len_one_allele && first_base_same))
        {
            return false;
        }

        return true;
    }
}
Example #6
0
File: variant.cpp Project: atks/vt
/**
 * Updates VNTR related information from INFO fields.
 */
void Variant::update_vntr_from_info_fields()
{
    vntr.motif = bcf_get_rid(v);
    char** allele = bcf_get_allele(v);
//    vntr.exact_repeat_tract.assign(allele[0]);
//   std::string tags[16] = {"MOTIF", "RU", "BASIS", "MLEN", "BLEN", "REPEAT_TRACT", "COMP", "ENTROPY", "ENTROPY2", "KL_DIVERGENCE", "KL_DIVERGENCE2", "RL", "LL", "RU_COUNTS", "SCORE", "TRF_SCORE"};

    vntr.motif = bcf_get_info_str(h, v, "MOTIF");
    vntr.ru = bcf_get_info_str(h, v, "RU");
    vntr.basis = bcf_get_info_str(h, v, "BASIS");
    if (vntr.basis=="") vntr.basis = VNTR::get_basis(vntr.motif);
    vntr.mlen = vntr.motif.size();
    vntr.blen = (int32_t) vntr.basis.size();
    std::vector<int32_t> i_vec = bcf_get_info_int_vec(h, v, "REPEAT_TRACT", 2, 0);
    vntr.beg1 = i_vec[0];
    vntr.end1 = i_vec[1];
    i_vec = bcf_get_info_int_vec(h, v, "COMP", 4, 0);
    vntr.comp[0] = i_vec[0];
    vntr.comp[1] = i_vec[1];
    vntr.comp[2] = i_vec[2];
    vntr.comp[3] = i_vec[3];
    vntr.entropy = bcf_get_info_flt(h, v, "ENTROPY");
    vntr.entropy2 = bcf_get_info_flt(h, v, "ENTROPY2");
    vntr.kl_divergence = bcf_get_info_flt(h, v, "KL_DIVERGENCE");
    vntr.kl_divergence2 = bcf_get_info_flt(h, v, "KL_DIVERGENCE2");
    vntr.rl = bcf_get_info_int(h, v, "RL");
    vntr.ll = bcf_get_info_int(h, v, "LL");
    i_vec = bcf_get_info_int_vec(h, v, "RU_COUNTS", 2, 0);
    vntr.no_perfect_ru = i_vec[0];
    vntr.no_ru = i_vec[1];
    vntr.score = bcf_get_info_flt(h, v, "SCORE");
    vntr.trf_score = bcf_get_info_int(h, v, "TRF_SCORE");

    vntr.exact_motif = bcf_get_info_str(h, v, "EX_MOTIF");
    vntr.exact_ru = bcf_get_info_str(h, v, "EX_RU");
    vntr.exact_basis = bcf_get_info_str(h, v, "EX_BASIS");
    vntr.exact_mlen = (int32_t) vntr.exact_motif.size();
    vntr.exact_blen = (int32_t) vntr.exact_basis.size();
    i_vec = bcf_get_info_int_vec(h, v, "EX_REPEAT_TRACT", 2, 0);
    vntr.exact_beg1 = i_vec[0];
    vntr.exact_end1 = i_vec[1];
    i_vec = bcf_get_info_int_vec(h, v, "EX_COMP", 4, 0);
    vntr.exact_comp[0] = i_vec[0];
    vntr.exact_comp[1] = i_vec[1];
    vntr.exact_comp[2] = i_vec[2];
    vntr.exact_comp[3] = i_vec[3];
    vntr.exact_entropy = bcf_get_info_flt(h, v, "EX_ENTROPY");
    vntr.exact_entropy2 = bcf_get_info_flt(h, v, "EX_ENTROPY2");
    vntr.exact_kl_divergence = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE");
    vntr.exact_kl_divergence2 = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE2");
    vntr.exact_rl = bcf_get_info_int(h, v, "EX_RL");
    vntr.exact_ll = bcf_get_info_int(h, v, "EX_LL");
    i_vec = bcf_get_info_int_vec(h, v, "EX_RU_COUNTS", 2, 0);
    vntr.exact_no_perfect_ru = i_vec[0];
    vntr.exact_no_ru = i_vec[1];
    vntr.exact_score = bcf_get_info_flt(h, v, "EX_SCORE");
    vntr.exact_trf_score = bcf_get_info_int(h, v, "EX_TRF_SCORE");

    vntr.fuzzy_motif = bcf_get_info_str(h, v, "FZ_MOTIF");
    vntr.fuzzy_ru = bcf_get_info_str(h, v, "FZ_RU");
    vntr.fuzzy_basis = bcf_get_info_str(h, v, "FZ_BASIS");
    vntr.fuzzy_mlen = (int32_t) vntr.fuzzy_motif.size();
    vntr.fuzzy_blen = (int32_t) vntr.fuzzy_basis.size();
    i_vec = bcf_get_info_int_vec(h, v, "FZ_REPEAT_TRACT", 2, 0);
    vntr.fuzzy_beg1 = i_vec[0];
    vntr.fuzzy_end1 = i_vec[1];
    i_vec = bcf_get_info_int_vec(h, v, "FZ_COMP", 4, 0);
    vntr.fuzzy_comp[0] = i_vec[0];
    vntr.fuzzy_comp[1] = i_vec[1];
    vntr.fuzzy_comp[2] = i_vec[2];
    vntr.fuzzy_comp[3] = i_vec[3];
    vntr.fuzzy_entropy = bcf_get_info_flt(h, v, "FZ_ENTROPY");
    vntr.fuzzy_entropy2 = bcf_get_info_flt(h, v, "FZ_ENTROPY2");
    vntr.fuzzy_kl_divergence = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE");
    vntr.fuzzy_kl_divergence2 = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE2");
    vntr.fuzzy_rl = bcf_get_info_int(h, v, "FZ_RL");
    vntr.fuzzy_ll = bcf_get_info_int(h, v, "FZ_LL");
    i_vec = bcf_get_info_int_vec(h, v, "FZ_RU_COUNTS", 2, 0);
    vntr.fuzzy_no_perfect_ru = i_vec[0];
    vntr.fuzzy_no_ru = i_vec[1];
    vntr.fuzzy_score = bcf_get_info_flt(h, v, "FZ_SCORE");
    vntr.fuzzy_trf_score = bcf_get_info_int(h, v, "FZ_TRF_SCORE");
}
Example #7
0
File: variant.cpp Project: atks/vt
/**
 * Classifies variants.
 */
int32_t Variant::classify(bcf_hdr_t *h, bcf1_t *v)
{
    clear();

    this->h = h;
    this->v = v;

    bcf_unpack(v, BCF_UN_STR);
    chrom.assign(bcf_get_chrom(h, v));
    rid = bcf_get_rid(v);
    pos1 = bcf_get_pos1(v);
    end1 = bcf_get_end1(v);
    char** allele = bcf_get_allele(v);
    int32_t n_allele = bcf_get_n_allele(v);
    int32_t pos0 = pos1-1;

    bool homogeneous_length = true;
    char* ref = allele[0];
    int32_t rlen = strlen(ref);

    if (strchr(ref, 'N'))
    {
        contains_N = true;
    }

    //if only ref allele, skip this entire for loop
    for (size_t i=1; i<n_allele; ++i)
    {
        int32_t allele_type = VT_REF;

        //check for symbolic alternative alleles
        if (strchr(allele[i],'<'))
        {
            size_t len = strlen(allele[i]);
            if (len>=5)
            {
                //VN/d+
                if (allele[i][0]=='<' && allele[i][1]=='V' && allele[i][2]=='N' && allele[i][len-1]=='>' )
                {
                    for (size_t j=3; j<len-1; ++j)
                    {
                        if (allele[i][j]<'0' || allele[i][j]>'9')
                        {
                            allele_type = VT_VNTR;
                        }
                    }
                }
                //VNTR
                else if (len==6 &&
                         allele[i][0]=='<' &&
                         allele[i][1]=='V' && allele[i][2]=='N' && allele[i][3]=='T' && allele[i][4]=='R' &&
                         allele[i][5]=='>' )
                {
                     allele_type = VT_VNTR;
                }
                //STR
                else if (len==5 &&
                         allele[i][0]=='<' &&
                         allele[i][1]=='S' && allele[i][2]=='T' && allele[i][3]=='R' &&
                         allele[i][4]=='>' )
                {
                     allele_type = VT_VNTR;
                }
                //ST/d+
                else if (allele[i][0]=='<' && allele[i][1]=='S' && allele[i][2]=='T' && allele[i][len-1]=='>' )
                {
                    type = VT_VNTR;

                    for (size_t j=3; j<len-1; ++j)
                    {
                        if ((allele[i][j]<'0' || allele[i][j]>'9') && allele[i][j]!='.')
                        {
                            type = VT_SV;
                        }
                    }
                }
            }

            if (allele_type==VT_VNTR)
            {
                allele_type = VT_VNTR;
                type |= allele_type;
                alleles.push_back(Allele(allele_type));
            }
            else
            {
                allele_type = VT_SV;
                type |= allele_type;
                std::string sv_type(allele[i]);
                alleles.push_back(Allele(allele_type, sv_type));
            }
        }
        //checks for chromosomal breakpoints
        else if (strchr(allele[i],'[')||strchr(allele[i],']'))
        {
            allele_type = VT_SV;
            type |= allele_type;
            std::string sv_type("<BND>");
            alleles.push_back(Allele(allele_type, sv_type));
        }
        //non variant record
        else if (allele[i][0]=='.' || strcmp(allele[i],allele[0])==0)
        {
            type = VT_REF;
        }
        //explicit sequence of bases
        else
        {
            kstring_t REF = {0,0,0};
            kstring_t ALT = {0,0,0};

            ref = allele[0];
            char* alt = allele[i];
            int32_t alen = strlen(alt);

            if (strchr(alt, 'N'))
            {
                contains_N = true;
            }

            if (rlen!=alen)
            {
                homogeneous_length = false;
            }

            //trimming
            //this is required in particular for the
            //characterization of multiallelics and
            //in general, any unnormalized variant
            int32_t rl = rlen;
            int32_t al = alen;
            //trim right
            while (rl!=1 && al!=1)
            {
                if (ref[rl-1]==alt[al-1])
                {
                    --rl;
                    --al;
                }
                else
                {
                    break;
                }
            }

            //trim left
            while (rl !=1 && al!=1)
            {
                if (ref[0]==alt[0])
                {
                    ++ref;
                    ++alt;
                    --rl;
                    --al;
                }
                else
                {
                    break;
                }
            }

            kputsn(ref, rl, &REF);
            kputsn(alt, al, &ALT);

            ref = REF.s;
            alt = ALT.s;

            int32_t mlen = std::min(rl, al);
            int32_t dlen = al-rl;
            int32_t diff = 0;
            int32_t ts = 0;
            int32_t tv = 0;

            if (mlen==1 && dlen)
            {
                char ls, le, ss;

                if (rl>al)
                {
                     ls = ref[0];
                     le = ref[rl-1];
                     ss = alt[0];
                }
                else
                {
                     ls = alt[0];
                     le = alt[al-1];
                     ss = ref[0];
                }

                if (ls!=ss && le!=ss)
                {
                    ++diff;

                    if ((ls=='G' && ss=='A') ||
                        (ls=='A' && ss=='G') ||
                        (ls=='C' && ss=='T') ||
                        (ls=='T' && ss=='C'))
                    {
                        ++ts;
                    }
                    else
                    {
                        ++tv;
                    }
                }
            }
            else
            {
                for (int32_t j=0; j<mlen; ++j)
                {
                    if (ref[j]!=alt[j])
                    {
                        ++diff;

                        if ((ref[j]=='G' && alt[j]=='A') ||
                            (ref[j]=='A' && alt[j]=='G') ||
                            (ref[j]=='C' && alt[j]=='T') ||
                            (ref[j]=='T' && alt[j]=='C'))
                        {
                            ++ts;
                        }
                        else
                        {
                            ++tv;
                        }
                    }
                }
            }

            //substitution variants
            if (mlen==diff)
            {
                allele_type |= mlen==1 ? VT_SNP : VT_MNP;
            }

            //indel variants
            if (dlen)
            {
                allele_type |= VT_INDEL;
            }

            //clumped SNPs and MNPs
            if (diff && diff < mlen) //internal gaps
            {
                allele_type |= VT_CLUMPED;
            }

            type |= allele_type;
            alleles.push_back(Allele(type, diff, alen, dlen, mlen, ts, tv));
            ts += ts;
            tv += tv;
            ins = dlen>0?1:0;
            del = dlen<0?1:0;

            if (REF.m) free(REF.s);
            if (ALT.m) free(ALT.s);
        }
    }

    if (type==VT_VNTR)
    {
        update_vntr_from_info_fields(h, v);
    }

    //additionally define MNPs by length of all alleles
    if (!(type&(VT_VNTR|VT_SV)) && type!=VT_REF)
    {
        if (homogeneous_length && rlen>1 && n_allele>1)
        {
            type |= VT_MNP;
        }
    }

    return type;
}
Example #8
0
/**
 * Constructor.
 * @v - VCF record.
 */
GenotypingRecord::GenotypingRecord(bcf_hdr_t *h, bcf1_t *v, int32_t vtype)
{
    clear();
    this->h = h;
    this->v = v;
    rid = bcf_get_rid(v);
    pos1 = bcf_get_pos1(v);
    this->vtype = vtype;
    int32_t n_allele = bcf_get_n_allele(v);
    
    if (vtype==VT_SNP && n_allele==2)
    {
        rid = bcf_get_rid(v);
        beg1 = bcf_get_pos1(v);
        end1 = beg1;
    }
    else if (vtype==VT_INDEL && bcf_get_n_allele(v)==2)
    {
        rid = bcf_get_rid(v);
        char** alleles = bcf_get_allele(v);
        dlen = strlen(alleles[1])-strlen(alleles[0]);
        len = abs(dlen);

        int32_t *flanks = NULL;
        int32_t n = 0;
        if (bcf_get_info_int32(h, v, "FLANKS", &flanks, &n)>0)
        {
            lend1 = flanks[0];
            rbeg1 = flanks[1];
            free(flanks);
        }
        else
        {
            lend1 = bcf_get_pos1(v) - 1;
            rbeg1 = bcf_get_end_pos1(v) + 1;
        }

        int32_t *fuzzy_flanks = NULL;
        n = 0;
        if (bcf_get_info_int32(h, v, "FZ_FLANKS", &fuzzy_flanks, &n)>0)
        {
            fuzzy_lend1 = fuzzy_flanks[0];
            fuzzy_rbeg1 = fuzzy_flanks[1];
            free(fuzzy_flanks);
        }
        else
        {
            fuzzy_lend1 = bcf_get_pos1(v) - 1;
            fuzzy_rbeg1 = bcf_get_end_pos1(v) + 1;
        }

        beg1 = std::min(lend1-2, fuzzy_lend1-2);
        end1 = std::max(rbeg1+2, fuzzy_rbeg1+2);
    
        //construct alleles
        
        //get reference sequence
//        char* ref_seq = NULL;
//        int32_t ref_len = 0;
////        ref_seq = faidx_fetch_seq(fai, bcf_get_chrom(h,v), lend1+1-1, rbeg1-1-1, &ref_len);
//        
//        for (uint32_t i=0; i<n_allele; ++i)
//        {
//            
//        }
        
//        for ()
//        {
//        }
//    
        if (dlen>0)
        {
            indel.append(&alleles[1][1]);
        }
        else
        {
            indel.append(&alleles[0][1]);
        }
    }
    else if (vtype==VT_VNTR)
    {
        rid = bcf_get_rid(v);
        beg1 = bcf_get_pos1(v) - 1;
        end1 = bcf_get_end_pos1(v) + 1;
        
        char *motif = NULL;
        int32_t n = 0;
        
        if (bcf_get_info_string(h, v, "MOTIF", &motif, &n)>0)
        {
           this->motif.assign(motif);
           free(motif);
        }
    }
}