/** * @param cpy_flnk_5p how many characters to copy from end of 5' flank to start of allele * @param cpy_flnk_3p how many characters to copy from end of 3' flank to end of allele */ static void align_entry_allele(const char *line, size_t linelen, const char *flank5p, size_t flank5p_len, const char *flank3p, size_t flank3p_len, size_t cpy_flnk_5p, size_t cpy_flnk_3p, const read_t *chr, size_t ref_start, size_t ref_end, bool fw_strand, const char *info, const char **genotypes, StrBuf *tmpbuf, FILE *fout) { (void)flank3p_len; ctx_assert(ref_start <= ref_end); // Ref allele const char *ref_allele = chr->seq.b + ref_start; size_t ref_len = ref_end-ref_start; // Construct alt allele const char *alt_allele; size_t alt_len; if(cpy_flnk_5p + cpy_flnk_3p == 0 && fw_strand) { alt_allele = line; alt_len = linelen; } else { strbuf_reset(tmpbuf); strbuf_append_strn(tmpbuf, flank5p+flank5p_len-cpy_flnk_5p, cpy_flnk_5p); strbuf_append_strn(tmpbuf, line, linelen); strbuf_append_strn(tmpbuf, flank3p, cpy_flnk_3p); if(!fw_strand) dna_revcomp_str(tmpbuf->b, tmpbuf->b, tmpbuf->end); alt_allele = tmpbuf->b; alt_len = tmpbuf->end; } // printf("%.*s vs %.*s\n", (int)(ref_end-ref_start), chr->seq.b + ref_start, // (int)alt_len, seq); // Align chrom and seq needleman_wunsch_align2(ref_allele, alt_allele, ref_len, alt_len, &nw_scoring_allele, nw_aligner, aln); num_nw_allele++; // Break into variants and print VCF align_biallelic(aln->result_a, aln->result_b, chr, ref_start, info, genotypes, fout); }
void acall_decompose(CallDecomp *dc, const AlignedCall *call, size_t max_line_len, size_t max_allele_len) { dc->stats.ncalls++; if(call->chrom == NULL) { return; } dc->stats.ncalls_mapped++; const read_t *chrom = call->chrom; const char *ref_allele = chrom->seq.b + call->start; size_t i, ref_len = call->end - call->start; const StrBuf *alt; ctx_assert2(call->start <= call->end, "%u .. %u", call->start, call->end); if(ref_len > max_line_len) { dc->stats.ncalls_ref_allele_too_long++; return; // can't align } dc->stats.nlines += call->n_lines; // printf("chr:%s %u - %u\n", call->chrom->name.b, call->start, call->end); for(i = 0; i < call->n_lines; i++) { alt = &call->lines[i]; ctx_assert(strlen(alt->b) == alt->end); // Quick check if sequence too long or are matching if(alt->end > max_line_len) { dc->stats.nlines_too_long++; } else if(ref_len == alt->end && strncasecmp(ref_allele, alt->b, ref_len) == 0) { dc->stats.nlines_match_ref++; } else { // printf("REF: '%*.s' [%zu]\n", (int)ref_len, ref_allele, ref_len); // printf("ALT: '%*.s' [%zu]\n", (int)alt->end, alt->b, alt->end); needleman_wunsch_align2(ref_allele, alt->b, ref_len, alt->end, dc->scoring, dc->nw_aligner, dc->aln); // printf("ALNA: %s\n", dc->aln->result_a); // printf("ALNB: %s\n", dc->aln->result_b); align_biallelic(dc->aln->result_a, dc->aln->result_b, chrom, call->gts+i*call->n_samples, call->n_samples, dc, call, max_allele_len); dc->stats.nlines_mapped++; } } }