Exemplo n.º 1
0
// @param vcf_pos is 1-based
// @param prev_base is -1 if SNP otherwise previous base
static void print_vcf_entry(const char *chrom_name, size_t vcf_pos, int prev_base,
                            const char *ref, const char *alt,
                            size_t aligned_len,
                            const char *info,
                            const char **genotypes,
                            FILE *fout)
{
  // Check actual allele length
  size_t i, alt_bases = 0;
  for(i = 0; i < aligned_len; i++) alt_bases += (alt[i] != '-');
  if(alt_bases > max_allele_len) return;

  // CHROM POS ID REF ALT QUAL FILTER INFO
  fprintf(fout, "%s\t%zu\tvar%zu\t", chrom_name, vcf_pos, num_vars_printed);
  print_vcf_allele(prev_base, ref, aligned_len, fout);
  fputc('\t', fout);
  print_vcf_allele(prev_base, alt, aligned_len, fout);
  fputs("\t.\tPASS\t", fout);
  if(info) fputs(info, fout);
  else fputc('.', fout);
  fputs("\tGT", fout);

  // Print genotypes
  if(genotypes) {
    for(i = 0; i < num_samples; i++) {
      fputc('\t', fout);
      fputs(genotypes[i], fout);
    }
  }

  fputc('\n', fout);
  num_vars_printed++;
}
Exemplo n.º 2
0
// @param vcf_pos is 0-based
// @param prev_base is -1 if SNP otherwise previous base
// @param next_base is -1 unless indel at position 0
static void print_vcf_entry(size_t vcf_pos, int8_t prev_base, int8_t next_base,
                            const char *ref, const char *alt, size_t len,
                            const uint8_t *gts, size_t nsamples,
                            CallDecomp *dc, const AlignedCall *call,
                            size_t max_allele_len)
{
  dc->stats.nvars++;

  StrBuf *sbuf = &dc->sbuf;
  strbuf_reset(sbuf);

  // Check actual allele length
  size_t i, alt_bases = 0;
  for(i = 0; i < len; i++) alt_bases += (alt[i] != '-');
  if(alt_bases > max_allele_len) { dc->stats.nallele_too_long++; return; }

  // CHROM POS ID REF ALT QUAL FILTER INFO
  strbuf_append_str(sbuf, call->chrom->name.b);
  strbuf_append_char(sbuf, '\t');
  strbuf_append_ulong(sbuf, vcf_pos+1);
  strbuf_append_str(sbuf, "\t.\t");
  print_vcf_allele(ref, len, prev_base, next_base, sbuf);
  strbuf_append_char(sbuf, '\t');
  print_vcf_allele(alt, len, prev_base, next_base, sbuf);
  strbuf_append_str(sbuf, "\t.\tPASS\t");
  strbuf_append_str(sbuf, call->info.b ? call->info.b : ".");
  strbuf_append_str(sbuf, "\tGT");

  // Print genotypes
  for(i = 0; i < nsamples; i++) {
    strbuf_append_char(sbuf, '\t');
    strbuf_append_char(sbuf, gts[i] ? '1' : '.');
  }

  strbuf_append_char(sbuf, '\n');

  // fprintf(stderr, " prev_base:%i next_base:%i info:%s\n", prev_base, next_base, call->info.b);
  // fprintf(stderr, "%s [%zu vs %zu]\n", sbuf->b, sbuf->end, strlen(sbuf->b));

  kstring_t ks = {.l = sbuf->end, .m = sbuf->size, .s = sbuf->b};
  if(vcf_parse(&ks, dc->vcfhdr, dc->v) != 0)
    die("Cannot construct VCF entry: %s", sbuf->b);
  if(bcf_write(dc->vcffh, dc->vcfhdr, dc->v) != 0)
    die("Cannot write VCF entry [nsamples: %zu vs %zu]", nsamples, (size_t)bcf_hdr_nsamples(dc->vcfhdr));
  // Move back into our string buffer
  sbuf->b = ks.s;
  sbuf->size = ks.m;

  dc->stats.nvars_printed++;
}

// `ref` and `alt` are aligned alleles - should both be same length strings
// of 'ACGT-'
// return first mismatch position or -1
static int align_get_start(const char *ref, const char *alt)
{
  const char *start = ref;
  while(*ref) {
    if(*ref != *alt) return (ref - start);
    ref++; alt++;
  }
  return -1;
}

// `ref` and `alt` are aligned alleles - should both be same length strings
// of 'ACGT-'
// return first matching position
static int align_get_end(const char *ref, const char *alt)
{
  int i = 0;
  while(ref[i] && ref[i] != alt[i]) i++;
  return i;
}