// @param vcf_pos is 1-based // @param prev_base is -1 if SNP otherwise previous base static void print_vcf_entry(const char *chrom_name, size_t vcf_pos, int prev_base, const char *ref, const char *alt, size_t aligned_len, const char *info, const char **genotypes, FILE *fout) { // Check actual allele length size_t i, alt_bases = 0; for(i = 0; i < aligned_len; i++) alt_bases += (alt[i] != '-'); if(alt_bases > max_allele_len) return; // CHROM POS ID REF ALT QUAL FILTER INFO fprintf(fout, "%s\t%zu\tvar%zu\t", chrom_name, vcf_pos, num_vars_printed); print_vcf_allele(prev_base, ref, aligned_len, fout); fputc('\t', fout); print_vcf_allele(prev_base, alt, aligned_len, fout); fputs("\t.\tPASS\t", fout); if(info) fputs(info, fout); else fputc('.', fout); fputs("\tGT", fout); // Print genotypes if(genotypes) { for(i = 0; i < num_samples; i++) { fputc('\t', fout); fputs(genotypes[i], fout); } } fputc('\n', fout); num_vars_printed++; }
// @param vcf_pos is 0-based // @param prev_base is -1 if SNP otherwise previous base // @param next_base is -1 unless indel at position 0 static void print_vcf_entry(size_t vcf_pos, int8_t prev_base, int8_t next_base, const char *ref, const char *alt, size_t len, const uint8_t *gts, size_t nsamples, CallDecomp *dc, const AlignedCall *call, size_t max_allele_len) { dc->stats.nvars++; StrBuf *sbuf = &dc->sbuf; strbuf_reset(sbuf); // Check actual allele length size_t i, alt_bases = 0; for(i = 0; i < len; i++) alt_bases += (alt[i] != '-'); if(alt_bases > max_allele_len) { dc->stats.nallele_too_long++; return; } // CHROM POS ID REF ALT QUAL FILTER INFO strbuf_append_str(sbuf, call->chrom->name.b); strbuf_append_char(sbuf, '\t'); strbuf_append_ulong(sbuf, vcf_pos+1); strbuf_append_str(sbuf, "\t.\t"); print_vcf_allele(ref, len, prev_base, next_base, sbuf); strbuf_append_char(sbuf, '\t'); print_vcf_allele(alt, len, prev_base, next_base, sbuf); strbuf_append_str(sbuf, "\t.\tPASS\t"); strbuf_append_str(sbuf, call->info.b ? call->info.b : "."); strbuf_append_str(sbuf, "\tGT"); // Print genotypes for(i = 0; i < nsamples; i++) { strbuf_append_char(sbuf, '\t'); strbuf_append_char(sbuf, gts[i] ? '1' : '.'); } strbuf_append_char(sbuf, '\n'); // fprintf(stderr, " prev_base:%i next_base:%i info:%s\n", prev_base, next_base, call->info.b); // fprintf(stderr, "%s [%zu vs %zu]\n", sbuf->b, sbuf->end, strlen(sbuf->b)); kstring_t ks = {.l = sbuf->end, .m = sbuf->size, .s = sbuf->b}; if(vcf_parse(&ks, dc->vcfhdr, dc->v) != 0) die("Cannot construct VCF entry: %s", sbuf->b); if(bcf_write(dc->vcffh, dc->vcfhdr, dc->v) != 0) die("Cannot write VCF entry [nsamples: %zu vs %zu]", nsamples, (size_t)bcf_hdr_nsamples(dc->vcfhdr)); // Move back into our string buffer sbuf->b = ks.s; sbuf->size = ks.m; dc->stats.nvars_printed++; } // `ref` and `alt` are aligned alleles - should both be same length strings // of 'ACGT-' // return first mismatch position or -1 static int align_get_start(const char *ref, const char *alt) { const char *start = ref; while(*ref) { if(*ref != *alt) return (ref - start); ref++; alt++; } return -1; } // `ref` and `alt` are aligned alleles - should both be same length strings // of 'ACGT-' // return first matching position static int align_get_end(const char *ref, const char *alt) { int i = 0; while(ref[i] && ref[i] != alt[i]) i++; return i; }