示例#1
0
/**
 * @param cpy_flnk_5p how many characters to copy from end of 5' flank to start of allele
 * @param cpy_flnk_3p how many characters to copy from end of 3' flank to end of allele
 */
static void align_entry_allele(const char *line, size_t linelen,
                               const char *flank5p, size_t flank5p_len,
                               const char *flank3p, size_t flank3p_len,
                               size_t cpy_flnk_5p, size_t cpy_flnk_3p,
                               const read_t *chr,
                               size_t ref_start, size_t ref_end,
                               bool fw_strand,
                               const char *info, const char **genotypes,
                               StrBuf *tmpbuf, FILE *fout)
{
  (void)flank3p_len;
  ctx_assert(ref_start <= ref_end);

  // Ref allele
  const char *ref_allele = chr->seq.b + ref_start;
  size_t ref_len = ref_end-ref_start;

  // Construct alt allele
  const char *alt_allele;
  size_t alt_len;

  if(cpy_flnk_5p + cpy_flnk_3p == 0 && fw_strand)
  {
    alt_allele = line;
    alt_len = linelen;
  }
  else
  {
    strbuf_reset(tmpbuf);
    strbuf_append_strn(tmpbuf, flank5p+flank5p_len-cpy_flnk_5p, cpy_flnk_5p);
    strbuf_append_strn(tmpbuf, line, linelen);
    strbuf_append_strn(tmpbuf, flank3p, cpy_flnk_3p);

    if(!fw_strand) dna_revcomp_str(tmpbuf->b, tmpbuf->b, tmpbuf->end);

    alt_allele = tmpbuf->b;
    alt_len = tmpbuf->end;
  }

  // printf("%.*s vs %.*s\n", (int)(ref_end-ref_start), chr->seq.b + ref_start,
  //                          (int)alt_len, seq);

  // Align chrom and seq
  needleman_wunsch_align2(ref_allele, alt_allele, ref_len, alt_len,
                          &nw_scoring_allele, nw_aligner, aln);
  num_nw_allele++;

  // Break into variants and print VCF
  align_biallelic(aln->result_a, aln->result_b,
                  chr, ref_start,
                  info, genotypes, fout);
}
示例#2
0
void acall_decompose(CallDecomp *dc, const AlignedCall *call,
                     size_t max_line_len, size_t max_allele_len)
{
  dc->stats.ncalls++;
  if(call->chrom == NULL) { return; }
  dc->stats.ncalls_mapped++;

  const read_t *chrom = call->chrom;
  const char *ref_allele = chrom->seq.b + call->start;
  size_t i, ref_len = call->end - call->start;
  const StrBuf *alt;

  ctx_assert2(call->start <= call->end, "%u .. %u", call->start, call->end);

  if(ref_len > max_line_len) {
    dc->stats.ncalls_ref_allele_too_long++;
    return; // can't align
  }

  dc->stats.nlines += call->n_lines;

  // printf("chr:%s %u - %u\n", call->chrom->name.b, call->start, call->end);

  for(i = 0; i < call->n_lines; i++)
  {
    alt = &call->lines[i];
    ctx_assert(strlen(alt->b) == alt->end);

    // Quick check if sequence too long or are matching
    if(alt->end > max_line_len) {
      dc->stats.nlines_too_long++;
    } else if(ref_len == alt->end && strncasecmp(ref_allele, alt->b, ref_len) == 0) {
      dc->stats.nlines_match_ref++;
    } else {
      // printf("REF: '%*.s' [%zu]\n", (int)ref_len, ref_allele, ref_len);
      // printf("ALT: '%*.s' [%zu]\n", (int)alt->end, alt->b, alt->end);

      needleman_wunsch_align2(ref_allele, alt->b, ref_len, alt->end,
                              dc->scoring, dc->nw_aligner, dc->aln);

      // printf("ALNA: %s\n", dc->aln->result_a);
      // printf("ALNB: %s\n", dc->aln->result_b);

      align_biallelic(dc->aln->result_a, dc->aln->result_b, chrom,
                      call->gts+i*call->n_samples, call->n_samples,
                      dc, call, max_allele_len);
      dc->stats.nlines_mapped++;
    }
  }
}