Esempio n. 1
0
/**
 * @param ref_pos is 0-based here
 * @param info is extra text to print in the info field of each variant (may be NULL)
 * @param genotypes is strings to print in genotypes columns, of length num_samples.
 *                  It may be NULL.
 * @return number of variants printed
 */
static void align_biallelic(const char *ref, const char *alt,
                            const read_t *chrom,
                            const uint8_t *gts, size_t nsamples,
                            CallDecomp *dc, const AlignedCall *call,
                            size_t max_allele_len)
{
  int32_t start, len;
  size_t ref_nbases, alt_nbases, ref_pos = call->start, ref_end, vcf_pos;
  int8_t prev_base, next_base;
  bool is_snp;

  // printf("--\n ref: %s\n alt: %s\n", ref, alt);

  while((start = align_get_start(ref, alt)) > -1)
  {
    ref_pos += start; // assume ref[i]==alt[i] means ref[i]!='-'
    ref += start;
    alt += start;
    len = align_get_end(ref, alt);

    // printf("ref: %.*s\nalt: %.*s\nref_pos: %zu start: %i len %i\n",
    //        len, ref, len, alt, ref_pos, start, len);

    ref_nbases = align_get_nbases(ref, len);
    alt_nbases = align_get_nbases(alt, len);
    is_snp = (ref_nbases == 1 && alt_nbases == 1);
    ref_end = ref_pos+ref_nbases;
    vcf_pos = ref_pos; // copy in case we need left padding base

    // If one allele is going to be empty, we need a padding base
    // If ref_pos == 0, add extra base to end instead
    prev_base = next_base = -1;
    if(!is_snp) {
      if(ref_pos > 0) prev_base = chrom->seq.b[--vcf_pos];
      else if(ref_end < chrom->seq.end) next_base = chrom->seq.b[ref_end];
    }

    if(is_snp || prev_base > 0 || next_base > 0) {
      print_vcf_entry(vcf_pos, prev_base, next_base, ref, alt, len,
                      gts, nsamples, dc, call, max_allele_len);
    }

    ref_pos += ref_nbases;
    ref += len;
    alt += len;
  }
}
Esempio n. 2
0
/**
 * @param ref_pos is 0-based here
 * @param info is extra text to print in the info field of each variant (may be NULL)
 * @param genotypes is strings to print in genotypes columns, of length num_samples.
*                   It may be NULL.
 */
static void align_biallelic(const char *ref, const char *alt,
                            const read_t *chr, size_t ref_pos,
                            const char *info, const char **genotypes,
                            FILE *fout)
{
  int start, len;
  size_t ref_allele_len, alt_allele_len;
  int prev_base, vcf_pos;
  bool is_snp;

  // printf("--\n ref: %s\n alt: %s\n", ref, alt);

  while((start = align_get_start(ref, alt)) > -1)
  {
    ref_pos += start; // assume ref[i]==alt[i] means ref[i]!='-'
    ref += start;
    alt += start;
    len = align_get_end(ref, alt);

    // printf("ref: %.*s\nalt: %.*s\nref_pos: %zu start: %i len %i\n",
    //        len, ref, len, alt, ref_pos, start, len);

    ref_allele_len = align_get_len(ref, len);
    alt_allele_len = align_get_len(alt, len);
    is_snp = (ref_allele_len == 1 && alt_allele_len == 1);
    vcf_pos = ref_pos+1; // Convert to 1-based

    if(!is_snp) {
      prev_base = ref_pos > 0 ? chr->seq.b[ref_pos-1] : 'N';
      vcf_pos--;
    } else {
      prev_base = -1;
    }

    print_vcf_entry(chr->name.b, vcf_pos, prev_base,
                    ref, alt, len,
                    info, genotypes, fout);

    ref_pos += ref_allele_len;
    ref += len;
    alt += len;
  }
}