Example #1
0
/**
 * Gets a sorted string representation of the alleles of a variant.
 */
void bcf_alleles2string_sorted(bcf_hdr_t *h, bcf1_t *v, kstring_t *var)
{
    bcf_unpack(v, BCF_UN_STR);
    var->l = 0;
    
    if (v->n_allele==2)
    {
        kputs(bcf_get_alt(v, 0), var);
        kputc(',', var);
        kputs(bcf_get_alt(v, 1), var);
    }
    else
    {
        char** allele = bcf_get_allele(v);
        char** temp = (char**) malloc((bcf_get_n_allele(v)-1)*sizeof(char*));
        for (int32_t i=1; i<v->n_allele; ++i)
        {
            temp[i-1] = allele[i];
        }
        
        std::qsort(temp, bcf_get_n_allele(v)-1, sizeof(char*), cmpstr);
        
        kputs(bcf_get_alt(v, 0), var);
        for (int32_t i=0; i<v->n_allele-1; ++i)
        {
            kputc(',', var);
            kputs(temp[i], var);
        }
        
        free(temp);
    }
}
/**
 * Extract reference sequence region for motif discovery in a fuzzy fashion.
 */
void CandidateRegionExtractor::extract_regions_by_fuzzy_alignment(bcf_hdr_t* h, bcf1_t* v,  Variant& variant)
{
    if (debug)
    {
        if (debug) std::cerr << "********************************************\n";
        std::cerr << "EXTRACTIING REGION BY FUZZY ALIGNMENT\n\n";
    }

    VNTR& vntr = variant.vntr;
    const char* chrom = bcf_get_chrom(h, v);

    int32_t min_beg1 = bcf_get_pos1(v);
    int32_t max_end1 = min_beg1;

    //merge candidate search region
    for (size_t i=1; i<bcf_get_n_allele(v); ++i)
    {
        std::string ref(bcf_get_alt(v, 0));
        std::string alt(bcf_get_alt(v, i));
        int32_t pos1 = bcf_get_pos1(v);

        trim(pos1, ref, alt);

        if (debug)
        {
            std::cerr << "indel fragment : " << (ref.size()<alt.size()? alt : ref) << "\n";
            std::cerr << "               : " << ref << ":" << alt << "\n";
        }

        min_beg1 = fuzzy_left_align(chrom, pos1, ref, alt, 3);
        max_end1 = fuzzy_right_align(chrom, pos1 + ref.size() - 1, ref, alt, 3);

        int32_t seq_len;
        char* seq = faidx_fetch_seq(fai, chrom, min_beg1-1, max_end1-1, &seq_len);
        if (debug)
        {
            std::cerr << "FUZZY REGION " << min_beg1 << "-" << max_end1 << " (" << max_end1-min_beg1+1 <<") " << "\n";
            std::cerr << "             " << seq << "\n";
        }

        if (seq_len) free(seq);
    }

    int32_t seq_len;
    char* seq = faidx_fetch_seq(fai, chrom, min_beg1-1, max_end1-1, &seq_len);

    if (debug)
    {
        std::cerr << "FINAL FUZZY REGION " << min_beg1 << "-" << max_end1 << " (" << max_end1-min_beg1+1 <<") " << "\n";
        std::cerr << "                   " << seq << "\n";
    }

    vntr.exact_repeat_tract = seq;
    vntr.exact_rbeg1 = min_beg1;

    if (seq_len) free(seq);
}
/**
 * Checks if a vntr is a homopolymer.
 */
bool CandidateRegionExtractor::is_homopolymer(bcf_hdr_t* h, bcf1_t* v)
{
    bool is_homopolymer = false;
    uint32_t ref_len = strlen(bcf_get_ref(v));
    for (size_t i=1; i<bcf_get_n_allele(v); ++i)
    {
        std::string ref(bcf_get_alt(v, 0));
        std::string alt(bcf_get_alt(v, i));
        int32_t pos1 = bcf_get_pos1(v);
    }

    return is_homopolymer;
}
Example #4
0
/**
 * Gets a string representation of a variant.
 */
void bcf_variant2string(bcf_hdr_t *h, bcf1_t *v, kstring_t *var)
{
    bcf_unpack(v, BCF_UN_STR);
    var->l = 0;
    kputs(bcf_get_chrom(h, v), var);
    kputc(':', var);
    kputw(bcf_get_pos1(v), var);
    kputc(':', var);
    for (int32_t i=0; i<v->n_allele; ++i)
    {
        if (i) kputc(',', var);
        kputs(bcf_get_alt(v, i), var);
    }
}
Example #5
0
File: variant.cpp Project: atks/vt
/**
 * Gets a string representation of the variant.
 */
std::string Variant::get_variant_string()
{
    kstring_t var = {0,0,0};
    bcf_unpack(v, BCF_UN_STR);
    var.l = 0;
    kputs(bcf_get_chrom(h, v), &var);
    kputc(':', &var);
    kputw(bcf_get_pos1(v), &var);
    kputc(':', &var);
    for (size_t i=0; i<bcf_get_n_allele(v); ++i)
    {
        if (i) kputc('/', &var);
        kputs(bcf_get_alt(v, i), &var);
    }

    std::string str(var.s);

    if (var.m) free(var.s);

    return str;
}
/**
 * Extract reference sequence region for motif discovery.
 *
 * The input is a VCF record that contains an indel.
 * 
 * If the the indel has multiple alleles, it will examine all
 * alleles.
 *
 * todo: is might be a good idea to combine this step with motif detection
 *       since there seems to be a need to have an iterative process here
 *       to ensure a good candidate motif is chosen. *  
 */
void CandidateRegionExtractor::extract_regions_by_exact_alignment(bcf_hdr_t* h, bcf1_t* v, Variant& variant)
{
    if (debug)
    {
        if (debug) std::cerr << "********************************************\n";
        std::cerr << "EXTRACTIING REGION BY EXACT LEFT AND RIGHT ALIGNMENT\n\n";
    }

    VNTR& vntr = variant.vntr;
    const char* chrom = bcf_get_chrom(h, v);

    int32_t min_beg1 = bcf_get_pos1(v);
    int32_t max_end1 = min_beg1;

    if (debug)
    {
       bcf_print_liten(h, v);
    }

    //merge candidate search region
    for (size_t i=1; i<bcf_get_n_allele(v); ++i)
    {
        std::string ref(bcf_get_alt(v, 0));
        std::string alt(bcf_get_alt(v, i));
        int32_t pos1 = bcf_get_pos1(v);

        //this prevents introduction of flanks that do not harbour the repeat unit
        trim(pos1, ref, alt);

        int32_t end1 = pos1 + ref.size() - 1;
        right_align(chrom, end1, ref, alt);

        int32_t beg1 = end1 - ref.size() + 1;
        left_align(chrom, beg1, ref, alt);

        min_beg1 = beg1<min_beg1 ? beg1 : min_beg1;
        max_end1 = end1>max_end1 ? end1 : max_end1;

        int32_t seq_len;
        char* seq = faidx_fetch_seq(fai, chrom, min_beg1-1, max_end1-1, &seq_len);

        if (debug)
        {
            std::cerr << "EXACT REGION " << min_beg1 << "-" << max_end1 << " (" << max_end1-min_beg1+1 <<") from " << pos1 << ":" << ref << ":" << alt << "\n";
            std::cerr << "             " << seq << "\n";
        }

        if (seq_len) free(seq);
    }

    int32_t seq_len;
    char* seq = faidx_fetch_seq(fai, chrom, min_beg1-1, max_end1-1, &seq_len);

    if (debug)
    {
        std::cerr << "FINAL EXACT REGION " << min_beg1 << "-" << max_end1 << " (" << max_end1-min_beg1+1 <<") " << "\n";
        std::cerr << "                   " << seq << "\n";
    }

    vntr.exact_repeat_tract = seq;
    vntr.rid = bcf_get_rid(v);
    vntr.exact_rbeg1 = min_beg1;
    vntr.exact_rend1 = max_end1;
    
    if (seq_len) free(seq);
}