void VariantHeaderMerger<fields_forward_LUT_ordering, fields_reverse_LUT_ordering, samples_forward_LUT_ordering, samples_reverse_LUT_ordering>:: add_header_fields_mapping(bcf_hdr_t* curr_header, unsigned input_vcf_idx) { assert(m_merged_vcf_header_ptr); for(auto j=0;j<curr_header->n[BCF_DT_ID];++j) { auto curr_id = &(curr_header->id[BCF_DT_ID][j]); for(auto bcf_hl_type : { BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT }) { //id has been deleted - ignore if(!bcf_hdr_idinfo_exists(curr_header, bcf_hl_type, j)) continue; bcf_hrec_t* hrec = bcf_hdr_id2hrec(curr_header, BCF_DT_ID, bcf_hl_type, j); if(hrec) //not deleted { const char* key = curr_id->key; auto merged_idx = bcf_hdr_id2int(m_merged_vcf_header_ptr.get(), BCF_DT_ID, key); assert(merged_idx >= 0 && merged_idx < m_merged_vcf_header_ptr->n[BCF_DT_ID]); assert(bcf_hdr_idinfo_exists(m_merged_vcf_header_ptr, bcf_hl_type, merged_idx)); m_header_fields_LUT.add_input_merged_idx_pair(input_vcf_idx, j, merged_idx); } } } }
/* Called once at startup, allows to initialize local variables. Return 1 to suppress VCF/BCF header from printing, 0 for standard VCF/BCF output and -1 on critical errors. */ int init(const char *opts, bcf_hdr_t *in, bcf_hdr_t *out) { int i, id; in_hdr = in; tags = config_get_list(opts ? opts : "tags=PL,GL,GT","tags", &ntags); for (i=0; i<ntags; i++) { if ( !strcmp("PL",tags[i]) ) { id = bcf_hdr_id2int(in_hdr,BCF_DT_ID,"PL"); if ( bcf_hdr_idinfo_exists(in_hdr,BCF_HL_FMT,id) ) { pl_type = bcf_hdr_id2type(in_hdr,BCF_HL_FMT,id); if ( pl_type!=BCF_HT_INT && pl_type!=BCF_HT_REAL ) { fprintf(stderr,"Expected numeric type of FORMAT/PL\n"); return -1; } handlers = (dosage_f*) realloc(handlers,(nhandlers+1)*sizeof(*handlers)); handlers[nhandlers++] = calc_dosage_PL; } } else if ( !strcmp("GL",tags[i]) ) { id = bcf_hdr_id2int(in_hdr,BCF_DT_ID,"GL"); if ( bcf_hdr_idinfo_exists(in_hdr,BCF_HL_FMT,id) ) { gl_type = bcf_hdr_id2type(in_hdr,BCF_HL_FMT,id); if ( gl_type!=BCF_HT_INT && gl_type!=BCF_HT_REAL ) { fprintf(stderr,"Expected numeric type of FORMAT/GL\n"); return -1; } handlers = (dosage_f*) realloc(handlers,(nhandlers+1)*sizeof(*handlers)); handlers[nhandlers++] = calc_dosage_GL; } } else if ( !strcmp("GT",tags[i]) ) { handlers = (dosage_f*) realloc(handlers,(nhandlers+1)*sizeof(*handlers)); handlers[nhandlers++] = calc_dosage_GT; } else { fprintf(stderr,"No handler for tag \"%s\"\n", tags[i]); return -1; } } free(tags[0]); free(tags); printf("#[1]CHROM\t[2]POS\t[3]REF\t[4]ALT"); for (i=0; i<bcf_hdr_nsamples(in_hdr); i++) printf("\t[%d]%s", i+5,in_hdr->samples[i]); printf("\n"); return 1; }
static int filters_init_func(filter_t *filter, int func_type, char **str, token_t *tok) { char *e = *str; while ( *e && *e!=')' ) e++; if ( !*e ) error("Could not parse the expression, right bracket not found [...%s]\n", str); kstring_t tmp = {0,0,0}; kputsn(*str, e-(*str), &tmp); (*str) += e-(*str)+1; tok->hdr_id = bcf_hdr_id2int(filter->hdr, BCF_DT_ID, tmp.s); if ( !bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FMT,tok->hdr_id) ) error("[%s:%d %s] Error: the tag \"FORMAT/%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s); int fmt_type = bcf_hdr_id2type(filter->hdr,BCF_HL_FMT,tok->hdr_id); if ( fmt_type!=BCF_HT_INT && fmt_type!=BCF_HT_REAL ) error("[%s:%d %s] Error: expected numeric tag with %s\n", tmp.s); switch (func_type) { case TOK_MAX: tok->setter = filters_set_format_max; break; case TOK_MIN: tok->setter = filters_set_format_min; break; case TOK_AVG: tok->setter = filters_set_format_avg; break; default: error("[%s:%d %s] Error: unknown func_type: %d\n", func_type); } tok->tok_type = TOK_VAL; tok->tag = tmp.s; return 0; }
static void init_data(args_t *args) { bcf_srs_t *files = bcf_sr_init(); if ( args->regions_list ) { if ( bcf_sr_set_regions(files, args->regions_list, args->regions_is_file)<0 ) error("Failed to read the regions: %s\n", args->regions_list); } if ( args->targets_list ) { if ( bcf_sr_set_targets(files, args->targets_list, args->targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", args->targets_list); } if ( !bcf_sr_add_reader(files, args->fname) ) error("Failed to open %s: %s\n", args->fname,bcf_sr_strerror(files->errnum)); bcf_hdr_t *hdr = files->readers[0].header; if ( !args->sample ) { if ( bcf_hdr_nsamples(hdr)>1 ) error("Missing the option -s, --sample\n"); args->sample = hdr->samples[0]; } else if ( bcf_hdr_id2int(hdr,BCF_DT_SAMPLE,args->sample)<0 ) error("No such sample: %s\n", args->sample); int ret = bcf_hdr_set_samples(hdr, args->sample, 0); if ( ret<0 ) error("Error setting the sample: %s\n", args->sample); if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,bcf_hdr_id2int(hdr,BCF_DT_ID,"BAF")) ) error("The tag FORMAT/BAF is not present in the VCF: %s\n", args->fname); int i; args->xvals = (double*) calloc(args->nbins,sizeof(double)); for (i=0; i<args->nbins; i++) args->xvals[i] = 1.0*i/(args->nbins-1); // collect BAF distributions for all chromosomes int idist = -1, nbaf = 0, nprocessed = 0, ntotal = 0, prev_chr = -1; float *baf = NULL; while ( bcf_sr_next_line(files) ) { ntotal++; bcf1_t *line = bcf_sr_get_line(files,0); if ( bcf_get_format_float(hdr,line,"BAF",&baf,&nbaf) != 1 ) continue; if ( bcf_float_is_missing(baf[0]) ) continue; nprocessed++; if ( prev_chr==-1 || prev_chr!=line->rid ) { // new chromosome idist = args->ndist++; args->dist = (dist_t*) realloc(args->dist, sizeof(dist_t)*args->ndist); memset(&args->dist[idist],0,sizeof(dist_t)); args->dist[idist].chr = strdup(bcf_seqname(hdr,line)); args->dist[idist].yvals = (double*) calloc(args->nbins,sizeof(double)); args->dist[idist].xvals = args->xvals; args->dist[idist].nvals = args->nbins; prev_chr = line->rid; } int bin = baf[0]*(args->nbins-1); args->dist[idist].yvals[bin]++; // the distribution } free(baf); bcf_sr_destroy(files); for (idist=0; idist<args->ndist; idist++) { #if 0 int j; for (j=0; j<args->nbins; j++) { double x = args->dist[idist].xvals[j]; args->dist[idist].yvals[j] = exp(-(x-0.5)*(x-0.5)/1e-3); } #endif init_dist(args, &args->dist[idist],args->verbose); } args->dat_fp = open_file(&args->dat_fname,"w","%s/dist.dat", args->output_dir); fprintf(args->dat_fp, "# This file was produced by: bcftools polysomy(%s+htslib-%s), the command line was:\n", bcftools_version(),hts_version()); fprintf(args->dat_fp, "# \t bcftools %s ", args->argv[0]); for (i=1; i<args->argc; i++) fprintf(args->dat_fp, " %s",args->argv[i]); fprintf(args->dat_fp,"\n#\n"); fprintf(args->dat_fp,"# DIST\t[2]Chrom\t[3]BAF\t[4]Normalized Count\n"); fprintf(args->dat_fp,"# FIT\t[2]Goodness of Fit\t[3]iFrom\t[4]iTo\t[5]The Fitted Function\n"); fprintf(args->dat_fp,"# CN\t[2]Chrom\t[3]Estimated Copy Number\t[4]Absolute fit deviation\n"); char *fname = NULL; FILE *fp = open_file(&fname,"w","%s/dist.py", args->output_dir); //-------- matplotlib script -------------- fprintf(fp, "#!/usr/bin/env python\n" "#\n" "import matplotlib as mpl\n" "mpl.use('Agg')\n" "import matplotlib.pyplot as plt\n" "import csv,sys,argparse\n" "from math import exp\n" "\n" "outdir = '%s'\n" "\n" "def read_dat(dat,fit,cn):\n" " csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)\n" " with open(outdir+'/dist.dat', 'rb') as f:\n" " reader = csv.reader(f, 'tab')\n" " for row in reader:\n" " if row[0][0]=='#': continue\n" " type = row[0]\n" " chr = row[1]\n" " if type=='DIST':\n" " if chr not in dat: dat[chr] = []\n" " dat[chr].append(row)\n" " elif type=='FIT':\n" " if chr not in fit: fit[chr] = []\n" " fit[chr].append(row)\n" " elif type=='CN':\n" " cn[chr] = row[2]\n" "\n" "def plot_dist(dat,fit,chr):\n" " fig, ax = plt.subplots(1, 1, figsize=(7,5))\n" " ax.plot([x[2] for x in dat[chr]],[x[3] for x in dat[chr]],'k-',label='Distribution')\n" " if chr in fit:\n" " for i in range(len(fit[chr])):\n" " pfit = fit[chr][i]\n" " exec('def xfit(x): return '+pfit[5])\n" " istart = int(pfit[3])\n" " iend = int(pfit[4])+1\n" " vals = dat[chr][istart:iend]\n" " args = {}\n" " if i==0: args = {'label':'Target to Fit'}\n" " ax.plot([x[2] for x in vals],[x[3] for x in vals],'r-',**args)\n" " if i==0: args = {'label':'Best Fit'}\n" " ax.plot([x[2] for x in vals],[xfit(float(x[2])) for x in vals],'g-',**args)\n" " ax.set_title('BAF distribution, chr'+chr)\n" " ax.set_xlabel('BAF')\n" " ax.set_ylabel('Frequency')\n" " ax.legend(loc='best',prop={'size':7},frameon=False)\n" " plt.savefig(outdir+'/dist.chr'+chr+'.png')\n" " plt.close()\n" "\n" "def plot_copy_number(cn):\n" " fig, ax = plt.subplots(1, 1, figsize=(7,5))\n" " xlabels = sorted(cn.keys())\n" " xvals = range(len(xlabels))\n" " yvals = [float(cn[x]) for x in xlabels]\n" " ax.plot(xvals,yvals,'o',color='red')\n" " for i in range(len(xvals)):\n" " if yvals[i]==-1: ax.annotate('?', xy=(xvals[i],0.5),va='center',ha='center',color='red',fontweight='bold')\n" " ax.tick_params(axis='both', which='major', labelsize=9)\n" " ax.set_xticks(xvals)\n" " ax.set_xticklabels(xlabels,rotation=45)\n" " ax.set_xlim(-1,len(xlabels))\n" " ax.set_ylim(0,5.0)\n" " ax.set_yticks([1.0,2.0,3.0,4.0])\n" " ax.set_xlabel('Chromosome')\n" " ax.set_ylabel('Copy Number')\n" " plt.savefig(outdir+'/copy-number.png')\n" " plt.close()\n" "\n" "class myParser(argparse.ArgumentParser):\n" " def error(self, message):\n" " self.print_help()\n" " sys.stderr.write('error: %%s\\n' %% message)\n" " sys.exit(2)\n" "\n" "def main():\n" " parser = myParser()\n" " parser.add_argument('-a', '--all', action='store_true', help='Create all plots')\n" " parser.add_argument('-c', '--copy-number', action='store_true', help='Create copy-number plot')\n" " parser.add_argument('-d', '--distrib', metavar='CHR', help='Plot BAF distribution of a single chromosome')\n" " args = parser.parse_args()\n" " dat = {}; fit = {}; cn = {}\n" " read_dat(dat,fit,cn)\n" " if args.distrib!=None:\n" " plot_dist(dat,fit,args.distrib)\n" " if args.all:\n" " for chr in dat: plot_dist(dat,fit,chr)\n" " plot_copy_number(cn)\n" " elif args.copy_number:\n" " plot_copy_number(cn)\n" " else:\n" " for chr in dat: plot_dist(dat,fit,chr)\n" "\n" "if __name__ == '__main__':\n" " main()\n", args->output_dir); //--------------------------------------- chmod(fname, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH|S_IXUSR|S_IXGRP|S_IXOTH); free(fname); fclose(fp); }
static void init_data(args_t *args) { args->sr = bcf_sr_init(); if ( args->region ) { args->sr->require_index = 1; if ( bcf_sr_set_regions(args->sr, args->region, args->region_is_file)<0 ) error("Failed to read the regions: %s\n",args->region); } if ( args->target && bcf_sr_set_targets(args->sr, args->target, args->target_is_file, 0)<0 ) error("Failed to read the targets: %s\n",args->target); if ( !bcf_sr_add_reader(args->sr,args->fname) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum)); args->hdr_in = bcf_sr_get_header(args->sr,0); args->hdr_out = bcf_hdr_dup(args->hdr_in); if ( args->filter_str ) args->filter = filter_init(args->hdr_in, args->filter_str); mkdir_p("%s/",args->output_dir); int i, nsmpl = bcf_hdr_nsamples(args->hdr_in); if ( !nsmpl ) error("No samples to split: %s\n", args->fname); args->fh = (htsFile**)calloc(nsmpl,sizeof(*args->fh)); args->bnames = set_file_base_names(args); kstring_t str = {0,0,0}; for (i=0; i<nsmpl; i++) { if ( !args->bnames[i] ) continue; str.l = 0; kputs(args->output_dir, &str); if ( str.s[str.l-1] != '/' ) kputc('/', &str); int k, l = str.l; kputs(args->bnames[i], &str); for (k=l; k<str.l; k++) if ( isspace(str.s[k]) ) str.s[k] = '_'; if ( args->output_type & FT_BCF ) kputs(".bcf", &str); else if ( args->output_type & FT_GZ ) kputs(".vcf.gz", &str); else kputs(".vcf", &str); args->fh[i] = hts_open(str.s, hts_bcf_wmode(args->output_type)); if ( args->fh[i] == NULL ) error("Can't write to \"%s\": %s\n", str.s, strerror(errno)); bcf_hdr_nsamples(args->hdr_out) = 1; args->hdr_out->samples[0] = args->bnames[i]; bcf_hdr_write(args->fh[i], args->hdr_out); } free(str.s); // parse tags int is_info = 0, is_fmt = 0; char *beg = args->keep_tags; while ( beg && *beg ) { if ( !strncasecmp("INFO/",beg,5) ) { is_info = 1; is_fmt = 0; beg += 5; } else if ( !strcasecmp("INFO",beg) ) { args->keep_info = 1; break; } else if ( !strncasecmp("INFO,",beg,5) ) { args->keep_info = 1; beg += 5; continue; } else if ( !strncasecmp("FMT/",beg,4) ) { is_info = 0; is_fmt = 1; beg += 4; } else if ( !strncasecmp("FORMAT/",beg,7) ) { is_info = 0; is_fmt = 1; beg += 7; } else if ( !strcasecmp("FMT",beg) ) { args->keep_fmt = 1; break; } else if ( !strcasecmp("FORMAT",beg) ) { args->keep_fmt = 1; break; } else if ( !strncasecmp("FMT,",beg,4) ) { args->keep_fmt = 1; beg += 4; continue; } else if ( !strncasecmp("FORMAT,",beg,7) ) { args->keep_fmt = 1; beg += 7; continue; } char *end = beg; while ( *end && *end!=',' ) end++; char tmp = *end; *end = 0; int id = bcf_hdr_id2int(args->hdr_in, BCF_DT_ID, beg); beg = tmp ? end + 1 : end; if ( is_info && bcf_hdr_idinfo_exists(args->hdr_in,BCF_HL_INFO,id) ) { if ( id >= args->ninfo_tags ) args->ninfo_tags = id + 1; hts_expand0(uint8_t, args->ninfo_tags, args->minfo_tags, args->info_tags); args->info_tags[id] = 1; } if ( is_fmt && bcf_hdr_idinfo_exists(args->hdr_in,BCF_HL_FMT,id) ) { if ( id >= args->nfmt_tags ) args->nfmt_tags = id + 1; hts_expand0(uint8_t, args->nfmt_tags, args->mfmt_tags, args->fmt_tags); args->fmt_tags[id] = 1; } } if ( !args->keep_info && !args->keep_fmt && !args->ninfo_tags && !args->nfmt_tags ) { args->keep_info = args->keep_fmt = 1; } }
// Parse filter expression and convert to reverse polish notation. Dijkstra's shunting-yard algorithm filter_t *filter_init(bcf_hdr_t *hdr, const char *str) { filter_t *filter = (filter_t *) calloc(1,sizeof(filter_t)); filter->str = strdup(str); filter->hdr = hdr; int nops = 0, mops = 0, *ops = NULL; // operators stack int nout = 0, mout = 0; // filter tokens, RPN token_t *out = NULL; char *tmp = filter->str; int last_op = -1; while ( *tmp ) { int len, ret; ret = filters_next_token(&tmp, &len); if ( ret==-1 ) error("Missing quotes in: %s\n", str); // fprintf(stderr,"token=[%c] .. [%s] %d\n", "x()[<=>]!|&+-*/Mm"[ret], tmp, len); // int i; for (i=0; i<nops; i++) fprintf(stderr," .%c.", "x()[<=>]!|&+-*/Mm"[ops[i]]); fprintf(stderr,"\n"); if ( ret==TOK_MAX || ret==TOK_MIN || ret==TOK_AVG ) { nout++; hts_expand0(token_t, nout, mout, out); filters_init_func(filter, ret, &tmp, &out[nout-1]); } else if ( ret==TOK_LFT ) // left bracket { nops++; hts_expand(int, nops, mops, ops); ops[nops-1] = ret; } else if ( ret==TOK_RGT ) // right bracket { while ( nops>0 && ops[nops-1]!=TOK_LFT ) { nout++; hts_expand0(token_t, nout, mout, out); out[nout-1].tok_type = ops[nops-1]; nops--; } if ( nops<=0 ) error("Could not parse: %s\n", str); nops--; } else if ( ret!=TOK_VAL ) // one of the operators { // detect unary minus: replace -value with -1*(value) if ( ret==TOK_SUB && last_op!=TOK_VAL && last_op!=TOK_RGT ) { nout++; hts_expand0(token_t, nout, mout, out); token_t *tok = &out[nout-1]; tok->tok_type = TOK_VAL; tok->hdr_id = -1; tok->pass = -1; tok->threshold = -1.0; ret = TOK_MULT; } else { while ( nops>0 && op_prec[ret] < op_prec[ops[nops-1]] ) { nout++; hts_expand0(token_t, nout, mout, out); out[nout-1].tok_type = ops[nops-1]; nops--; } } nops++; hts_expand(int, nops, mops, ops); ops[nops-1] = ret; } else if ( !len ) { if ( *tmp && !isspace(*tmp) ) error("Could not parse the expression: [%s]\n", str); break; // all tokens read } else // annotation name or filtering value { nout++; hts_expand0(token_t, nout, mout, out); filters_init1(filter, tmp, len, &out[nout-1]); tmp += len; } last_op = ret; } while ( nops>0 ) { if ( ops[nops-1]==TOK_LFT || ops[nops-1]==TOK_RGT ) error("Could not parse the expression: [%s]\n", filter->str); nout++; hts_expand0(token_t, nout, mout, out); out[nout-1].tok_type = ops[nops-1]; nops--; } // In the special cases of %TYPE and %FILTER the BCF header IDs are yet unknown. Walk through the // list of operators and convert the strings (e.g. "PASS") to BCF ids. The string value token must be // just before or after the %FILTER token and they must be followed with a comparison operator. // This code is fragile: improve me. int i; for (i=0; i<nout; i++) { if ( out[i].tok_type!=TOK_VAL ) continue; if ( !out[i].tag ) continue; if ( !strcmp(out[i].tag,"%TYPE") ) { if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str); int j = i+1; if ( out[j].tok_type==TOK_EQ || out[j].tok_type==TOK_NE ) j = i - 1; if ( out[j].tok_type!=TOK_VAL || !out[j].key ) error("[%s:%d %s] Could not parse the expression: %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str); if ( !strcasecmp(out[j].key,"snp") || !strcasecmp(out[j].key,"snps") ) out[j].threshold = VCF_SNP; else if ( !strcasecmp(out[j].key,"indel") || !strcasecmp(out[j].key,"indels") ) out[j].threshold = VCF_INDEL; else if ( !strcasecmp(out[j].key,"mnp") || !strcasecmp(out[j].key,"mnps") ) out[j].threshold = VCF_MNP; else if ( !strcasecmp(out[j].key,"other") ) out[j].threshold = VCF_OTHER; else error("The type \"%s\" not recognised: %s\n", out[j].key, filter->str); out[j].tag = out[j].key; out[j].key = NULL; i = j; continue; } if ( !strcmp(out[i].tag,"%FILTER") ) { if ( i+1==nout ) error("Could not parse the expression: %s\n", filter->str); int j = i+1; if ( out[j].tok_type==TOK_EQ || out[j].tok_type==TOK_NE ) j = i - 1; if ( out[j].tok_type!=TOK_VAL || !out[j].key ) error("[%s:%d %s] Could not parse the expression, an unquoted string value perhaps? %s\n", __FILE__,__LINE__,__FUNCTION__, filter->str); if ( strcmp(".",out[j].key) ) { out[j].hdr_id = bcf_hdr_id2int(filter->hdr, BCF_DT_ID, out[j].key); if ( !bcf_hdr_idinfo_exists(filter->hdr,BCF_HL_FLT,out[j].hdr_id) ) error("The filter \"%s\" not present in the VCF header\n", out[j].key); } else out[j].hdr_id = -1; out[j].tag = out[j].key; out[j].key = NULL; out[i].hdr_id = out[j].hdr_id; i = j; continue; } } // filter_debug_print(out, nout); if ( mops ) free(ops); filter->filters = out; filter->nfilters = nout; filter->flt_stack = (token_t **)malloc(sizeof(token_t*)*nout); return filter; }
int main(int argc, char **argv) { int i, n; static struct option const long_opts[] = { {"out", required_argument, NULL, 1}, {"report", required_argument, NULL, 2}, {"dotasref", no_argument, NULL, 3}, {"help", no_argument, NULL, 0}, {"version", no_argument, NULL, 4}, {"export_uncov", no_argument, NULL, 5} }; bool help = FALSE; bool report_version = FALSE; while ((n = getopt_long(argc, argv, "1:2:304", long_opts, NULL)) >= 0) { switch (n) { case 1 : outfile = strdup(optarg); break; case 2 : report = strdup(optarg); break; case 3 : dotasref = TRUE; break; case 0 : help = TRUE; break; case 4 : report_version = TRUE; break; case 5 : export_uncover = TRUE; break; default : return 1; } if ( help ) return usage(); if ( report_version ) return show_version(); } n = argc - optind; if ( n > 1 ) errabort("only accept one input vcf"); if ( export_uncover == TRUE && outfile == FALSE) { warnings("export uncove region only used with option --out"); export_uncover = FALSE; } char * input; if ( n == 0 ) input = strdup("-"); else input = strdup(argv[optind]); htsFile * fp = read_vcf_file(input); enum htsExactFormat fmt = hts_get_format(fp)->format; if ( fmt != vcf && fmt != bcf ) errabort("This is not a VCF/BCF file : %s", input); bcf_hdr_t * hdr = bcf_hdr_read(fp); int n_samples = bcf_hdr_nsamples(hdr); if ( n_samples != 2 ) errabort("the input VCF/BCF file must contain only two samples! %d", n_samples); LOG("Using sample %s as ref ...", hdr->samples[0]); LOG("Using sample %s as test ...", hdr->samples[1]); uint32_t matrix[4][4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} }; bcf1_t * v = bcf_init1(); kstring_t str = { 0, 0, 0 }; uint32_t line = 0; htsFile *out = NULL; if ( outfile && !check_filename(outfile) ) out = hts_open(outfile, mode); if ( out != NULL ) bcf_hdr_write(out, hdr); while ( bcf_read1(fp, hdr, v) >= 0 ) { bcf_unpack(v, BCF_UN_STR|BCF_UN_FMT); int k; str.l = 0; int tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, "GT"); if ( !bcf_hdr_idinfo_exists(hdr, BCF_HL_FMT, tag_id) ) warnings("There is no 'GT' in the header!"); for ( i = 0; i < v->n_fmt; ++i ) if ( v->d.fmt[i].id == tag_id ) break; if ( i == v->n_fmt ) { vcf_format1(hdr, v, &str); LOG("There is no tag GT in this line : %s", str.s); continue; } corr_t xy[2] = { {-1, -2, -2}, {-1, -2, -2} }; bcf_fmt_t * fmt = &v->d.fmt[i]; for ( i = 0; i < 2; ++i ) { int corr = i; if ( fmt == NULL ) { if ( dotasref == TRUE ) xy[corr].alt = ALT_IS_REF; else xy[corr].alt = ALT_IS_UNC; continue; } int last = -2; uint8_t *d = (uint8_t*)((char*)fmt->p + fmt->size*i); for ( k = 0; k < fmt->n && d[k] != (uint8_t)bcf_int8_vector_end; ++k ) { int curr = d[k]>>1; if ( last != curr ) { if ( curr ) { if ( last == -2 ) xy[corr].alt = curr > 1 ? ALT_IS_HOM : ALT_IS_REF; else xy[corr].alt = ALT_IS_HET; } else { xy[corr].alt = dotasref == TRUE ? ALT_IS_REF : ALT_IS_UNC; } } else { if ( curr ) { xy[corr].alt = curr > 1 ? ALT_IS_HOM : ALT_IS_REF; } else { xy[corr].alt = dotasref == TRUE ? ALT_IS_REF : ALT_IS_UNC; } } if (last == -2 ) { xy[corr].min = xy[corr].max = curr; } else { if ( curr < xy[corr].min ) xy[corr].min = curr; else if ( curr > xy[corr].max ) xy[corr].max = curr; } last = curr; } } matrix[xy[0].alt][xy[1].alt]++; if ( xy[0].alt != xy[1].alt && out != NULL) { if ( xy[0].alt == ALT_IS_UNC || xy[1].alt == ALT_IS_UNC ) { if ( export_uncover == TRUE ) { str.l = 0; vcf_format1(hdr, v, &str); vcf_write(out, hdr, v); } } else { str.l = 0; vcf_format1(hdr, v, &str); vcf_write(out, hdr, v); } } if ( xy[0].alt == ALT_IS_HET && xy[1].alt == ALT_IS_HET && (xy[0].min != xy[1].min || xy[0].max != xy[1].max ) ) { bias++; matrix[ALT_IS_HET][ALT_IS_HET]--; if ( out != NULL ) { str.l = 0; vcf_format1(hdr, v, &str); vcf_write(out, hdr, v); } } line++; } if ( out ) hts_close(out); if ( str.m ) free(str.s); write_report(matrix, hdr); bcf_hdr_destroy(hdr); free(input); bcf_destroy1(v); if ( outfile ) free(outfile); if ( report ) free(report); if ( hts_close(fp) ) warnings("hts_close returned non-zero status: %s", input); return 0; }
static void reheader_bcf(args_t *args, int is_compressed) { htsFile *fp = hts_open(args->fname, "r"); if ( !fp ) error("Failed to open: %s\n", args->fname); bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Failed to read the header: %s\n", args->fname); kstring_t htxt = {0,0,0}; int hlen; htxt.s = bcf_hdr_fmt_text(hdr, 1, &hlen); htxt.l = hlen; int i, nsamples = 0; char **samples = NULL; if ( args->samples_fname ) samples = hts_readlines(args->samples_fname, &nsamples); if ( args->header_fname ) { free(htxt.s); htxt.s = NULL; htxt.l = htxt.m = 0; read_header_file(args->header_fname, &htxt); } if ( samples ) { set_samples(samples, nsamples, &htxt); for (i=0; i<nsamples; i++) free(samples[i]); free(samples); } bcf_hdr_t *hdr_out = bcf_hdr_init("r"); bcf_hdr_parse(hdr_out, htxt.s); if ( args->header_fname ) hdr_out = strip_header(hdr, hdr_out); // write the header and the body htsFile *fp_out = hts_open("-",is_compressed ? "wb" : "wbu"); bcf_hdr_write(fp_out, hdr_out); bcf1_t *rec = bcf_init(); while ( bcf_read(fp, hdr, rec)==0 ) { // sanity checking, this slows things down. Make it optional? bcf_unpack(rec, BCF_UN_ALL); if ( rec->rid >= hdr_out->n[BCF_DT_CTG] || strcmp(bcf_hdr_int2id(hdr,BCF_DT_CTG,rec->rid),bcf_hdr_int2id(hdr_out,BCF_DT_CTG,rec->rid)) ) error("The CHROM is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_CTG,rec->rid)); for (i=0; i<rec->d.n_flt; i++) { int id = rec->d.flt[i]; if ( id >= hdr_out->n[BCF_DT_ID] ) break; if ( !bcf_hdr_idinfo_exists(hdr_out,BCF_HL_FLT,id) ) break; if ( strcmp(hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key) ) error("FIXME: Broken FILTER ids: %s vs %s\n", hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key); } if ( i!=rec->d.n_flt ) error("The FILTER is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_ID,rec->d.flt[i])); for (i=0; i<rec->n_info; i++) { int id = rec->d.info[i].key; if ( id >= hdr_out->n[BCF_DT_ID] ) break; if ( !hdr_out->id[BCF_DT_ID][id].key ) break; if ( !bcf_hdr_idinfo_exists(hdr_out,BCF_HL_INFO,id) ) break; if ( strcmp(hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key) ) error("FIXME: Broken INFO ids: %s vs %s\n", hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key); } if ( i!=rec->n_info ) error("The INFO tag is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_ID,rec->d.info[i].key)); for (i=0; i<rec->n_fmt; i++) { int id = rec->d.fmt[i].id; if ( id >= hdr_out->n[BCF_DT_ID] ) break; if ( !hdr_out->id[BCF_DT_ID][id].key ) break; if ( !bcf_hdr_idinfo_exists(hdr_out,BCF_HL_FMT,id) ) break; if ( strcmp(hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key) ) error("FIXME: Broken FORMAT ids: %s vs %s\n", hdr->id[BCF_DT_ID][id].key,hdr_out->id[BCF_DT_ID][id].key); } if ( i!=rec->n_fmt ) error("The FORMAT tag is not defined: \"%s\"\n", bcf_hdr_int2id(hdr,BCF_DT_ID,rec->d.fmt[i].id)); bcf_write(fp_out,hdr_out,rec); } bcf_destroy(rec); free(htxt.s); hts_close(fp_out); hts_close(fp); bcf_hdr_destroy(hdr_out); bcf_hdr_destroy(hdr); }
static void init_data(args_t *args) { args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type)); if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno)); if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads); args->hdr = args->files->readers[0].header; args->flt_pass = bcf_hdr_id2int(args->hdr,BCF_DT_ID,"PASS"); assert( !args->flt_pass ); // sanity check: required by BCF spec // -i or -e: append FILTER line if ( args->soft_filter && args->filter_logic ) { kstring_t flt_name = {0,0,0}; if ( strcmp(args->soft_filter,"+") ) kputs(args->soft_filter, &flt_name); else { // Make up a filter name int i = 0, id = -1; do { ksprintf(&flt_name,"Filter%d", ++i); id = bcf_hdr_id2int(args->hdr,BCF_DT_ID,flt_name.s); } while ( bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FLT,id) ); } // escape quotes kstring_t tmp = {0,0,0}; char *t = args->filter_str; while ( *t ) { if ( *t=='"' ) kputc('\\',&tmp); kputc(*t,&tmp); t++; } int ret = bcf_hdr_printf(args->hdr, "##FILTER=<ID=%s,Description=\"Set if %s: %s\">", flt_name.s,args->filter_logic & FLT_INCLUDE ? "not true" : "true", tmp.s); if ( ret!=0 ) error("Failed to append header line: ##FILTER=<ID=%s,Description=\"Set if %s: %s\">\n", flt_name.s,args->filter_logic & FLT_INCLUDE ? "not true" : "true", tmp.s); args->flt_fail = bcf_hdr_id2int(args->hdr,BCF_DT_ID,flt_name.s); assert( args->flt_fail>=0 ); free(flt_name.s); free(tmp.s); } if ( args->snp_gap || args->indel_gap ) { if ( !args->filter_logic && args->soft_filter && strcmp(args->soft_filter,"+") ) { kstring_t tmp = {0,0,0}; if ( args->snp_gap ) kputs("\"SnpGap\"", &tmp); if ( args->indel_gap ) { if ( tmp.s ) kputs(" and ", &tmp); kputs("\"IndelGap\"", &tmp); } fprintf(stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter); free(tmp.s); } rbuf_init(&args->rbuf, 64); args->rbuf_lines = (bcf1_t**) calloc(args->rbuf.m, sizeof(bcf1_t*)); if ( args->snp_gap ) { bcf_hdr_printf(args->hdr, "##FILTER=<ID=SnpGap,Description=\"SNP within %d bp of an indel\">", args->snp_gap); args->SnpGap_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "SnpGap"); assert( args->SnpGap_id>=0 ); } if ( args->indel_gap ) { bcf_hdr_printf(args->hdr, "##FILTER=<ID=IndelGap,Description=\"Indel within %d bp of an indel\">", args->indel_gap); args->IndelGap_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "IndelGap"); assert( args->IndelGap_id>=0 ); } } if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter"); if ( args->filter_str ) args->filter = filter_init(args->hdr, args->filter_str); }
static void init_data(args_t *args) { args->prev_rid = args->skip_rid = -1; args->hdr = args->files->readers[0].header; if ( !args->sample ) { if ( bcf_hdr_nsamples(args->hdr)>1 ) error("Missing the option -s, --sample\n"); args->sample = strdup(args->hdr->samples[0]); } if ( !bcf_hdr_nsamples(args->hdr) ) error("No samples in the VCF?\n"); // Set samples kstring_t str = {0,0,0}; if ( args->estimate_AF && strcmp("-",args->estimate_AF) ) { int i, n; char **smpls = hts_readlist(args->estimate_AF, 1, &n); // Make sure the query sample is included for (i=0; i<n; i++) if ( !strcmp(args->sample,smpls[i]) ) break; // Add the query sample if not present if ( i!=n ) kputs(args->sample, &str); for (i=0; i<n; i++) { if ( str.l ) kputc(',', &str); kputs(smpls[i], &str); free(smpls[i]); } free(smpls); } else if ( !args->estimate_AF ) kputs(args->sample, &str); if ( str.l ) { int ret = bcf_hdr_set_samples(args->hdr, str.s, 0); if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s); else if ( ret>0 ) error("The %d-th sample not found in the VCF\n", ret); } if ( args->af_tag ) if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) ) error("No such INFO tag in the VCF: %s\n", args->af_tag); args->nsmpl = bcf_hdr_nsamples(args->hdr); args->ismpl = bcf_hdr_id2int(args->hdr, BCF_DT_SAMPLE, args->sample); free(str.s); int i; for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.); // Init transition matrix and HMM double tprob[4]; MAT(tprob,2,STATE_HW,STATE_HW) = 1 - args->t2AZ; MAT(tprob,2,STATE_HW,STATE_AZ) = args->t2HW; MAT(tprob,2,STATE_AZ,STATE_HW) = args->t2AZ; MAT(tprob,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; if ( args->genmap_fname ) { args->hmm = hmm_init(2, tprob, 0); hmm_set_tprob_func(args->hmm, set_tprob_genmap, args); } else if ( args->rec_rate > 0 ) { args->hmm = hmm_init(2, tprob, 0); hmm_set_tprob_func(args->hmm, set_tprob_recrate, args); } else args->hmm = hmm_init(2, tprob, 10000); // print header printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version()); printf("# The command line was:\tbcftools %s", args->argv[0]); for (i=1; i<args->argc; i++) printf(" %s",args->argv[i]); printf("\n#\n"); printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n"); }
int beds_database_add(struct beds_options *opts, const char *fname, char *columns) { if ( opts->n_files == opts->m_files ) { opts->m_files = opts->m_files == 0 ? 2 : opts->m_files +2; opts->files = (struct beds_anno_file*)realloc(opts->files, opts->m_files*sizeof(struct beds_anno_file)); } struct beds_anno_file *file = &opts->files[opts->n_files]; memset(file, 0, sizeof(struct beds_anno_file)); file->id = opts->n_files; file->fname = strdup(fname); file->fp = hts_open(fname, "r"); if (file->fp == NULL) error("Failed to open %s : %s", fname, strerror(errno)); // int n; file->idx = tbx_index_load(fname); if ( file->idx == NULL) error("Failed to load index of %s.", fname); opts->n_files++; file->last_id = -1; file->last_start = -1; file->last_end = -1; kstring_t string = KSTRING_INIT; int no_columns = 0; int i; if ( columns == NULL && file->no_such_chrom == 0) { warnings("No columns string specified for %s. Will annotate all tags in this data.", fname); file->no_such_chrom = 1; no_columns = 1; } else { int *splits = NULL; kputs(columns, &string); int nfields; splits = ksplit(&string, ',', &nfields); file->m_cols = nfields; file->cols = (struct anno_col*)malloc(sizeof(struct anno_col) * file->m_cols); for ( i = 0; i < nfields; ++i ) { char *ss = string.s + splits[i]; struct anno_col *col = &file->cols[file->n_cols]; col->icol = i; col->replace = REPLACE_MISSING; if (*ss == '+') { col->replace = REPLACE_MISSING; ss++; } else if ( *ss == '-' ) { col->replace = REPLACE_EXISTING; ss++; } if (ss[0] == '\0') continue; if ( strncmp(ss, "INFO/", 5) == 0) ss += 5; col->hdr_key = strdup(ss); col->icol = -1; // debug_print("%s, %d", col->hdr_key, file->n_cols); file->n_cols++; } string.l = 0; } while (1) { string.l =0; if ( hts_getline(file->fp, KS_SEP_LINE, &string) < 0 ) break; // only accept header line in the beginning for file if ( string.s[0] != '#' ) break; if ( strncmp(string.s, "##INFO=", 7) == 0) { char *ss = string.s + 11; char *se = ss; while (se && *se != ',') se++; struct anno_col *col = NULL; // if no column string specified, init all header lines if ( no_columns ) { if ( file->n_cols == file->m_cols ) { file->m_cols = file->m_cols == 0 ? 2 : file->m_cols + 2; file->cols = (struct anno_col *) realloc(file->cols, file->m_cols*sizeof(struct anno_col)); } col = &file->cols[file->n_cols++]; col->icol = -1; col->hdr_key = strndup(ss, se-ss+1); col->hdr_key[se-ss] = '\0'; } else { for ( i = 0; i < file->n_cols; ++i ) { if ( strncmp(file->cols[i].hdr_key, ss, se-ss) == 0) break; } // if header line is not set in the column string, skip if ( i == file->n_cols ) continue; col = &file->cols[i]; } // specify setter functions here col->setter.bed = beds_setter_info_string; bcf_hdr_append(opts->hdr_out, string.s); bcf_hdr_sync(opts->hdr_out); int hdr_id = bcf_hdr_id2int(opts->hdr_out, BCF_DT_ID,col->hdr_key); assert ( bcf_hdr_idinfo_exists(opts->hdr_out, BCF_HL_INFO, hdr_id) ); } string.l = 0; // set column number for each col if ( strncasecmp(string.s, "#chr", 4) == 0) { int nfields; int *splits = ksplit(&string, '\t', &nfields); if (nfields < 4) { fprintf(stderr, "[error] Bad header of bed database : %s. n_fields : %d, %s", fname, nfields, string.s); fprintf(stderr, "[notice] this error usually happened because the header line is seperated by spaces but not tab!"); exit(1); } int k; for ( k = 3; k < nfields; ++k ) { char *ss = string.s + splits[k]; for (i = 0; i < file->n_cols; ++i ) { struct anno_col *col = &file->cols[i]; if ( strcmp(col->hdr_key, ss) == 0) break; } // if name line specify more names than column string or header, skip if ( i == file->n_cols ) continue; struct anno_col *col = &file->cols[i]; col->icol = k; } } } for ( i = 0; i < file->n_cols; ++i ) { struct anno_col *col = &file->cols[i]; if ( col->hdr_key && col->icol == -1 ) error("No column %s found in bed database : %s", col->hdr_key, fname); int hdr_id = bcf_hdr_id2int(opts->hdr_out, BCF_DT_ID, col->hdr_key); assert(hdr_id>-1); col->number = bcf_hdr_id2length(opts->hdr_out, BCF_HL_INFO, hdr_id); if ( col->number == BCF_VL_A || col->number == BCF_VL_R || col->number == BCF_VL_G) error("Only support fixed INFO number for bed database. %s", col->hdr_key); col->ifile = file->id; } if ( string.m ) free(string.s); if ( opts->beds_is_inited == 0 ) opts->beds_is_inited = 1; return 0; }
// only if annotation database is VCF/BCF file, header_in has values or else header_in == NULL anno_col_t *init_columns(const char *rules, bcf_hdr_t *header_in, bcf_hdr_t *header_out, int *ncols, enum anno_type type) { assert(rules != NULL); if (type == anno_is_vcf && header_in == NULL) { error("Inconsistent file type!"); } char *ss = (char*)rules, *se = ss; int nc = 0; anno_col_t *cols = NULL; kstring_t tmp = KSTRING_INIT; kstring_t str = KSTRING_INIT; int i = -1; while (*ss) { if ( *se && *se!=',' ) { se++; continue; } int replace = REPLACE_ALL; if ( *ss=='+') { replace = REPLACE_MISSING; ss++; } else if (*ss=='-') { replace = REPLACE_EXISTING; ss++; } i++; str.l = 0; kputsn(ss, se-ss, &str); if ( !str.s[0] ) { warnings("Empty tag in %s", rules); } else if ( !strcasecmp("CHROM", str.s) || !strcasecmp("POS", str.s) || !strcasecmp("FROM", str.s) || !strcasecmp("TO", str.s) || !strcasecmp("REF", str.s) || !strcasecmp("ALT", str.s) || !strcasecmp("FILTER", str.s) || !strcasecmp("QUAL", str.s)) { warnings("Skip tag %s", str.s); } else if ( !strcasecmp("ID", str.s) ) { nc++; cols = (struct anno_col*) realloc(cols, sizeof(struct anno_col)* (nc)); struct anno_col *col = &cols[nc-1]; col->icol = i; col->replace = replace; col->setter = type == anno_is_vcf ? vcf_setter_id : setter_id; col->hdr_key = strdup(str.s); } else if (!strcasecmp("INFO", str.s) || !strcasecmp("FORMAT", str.s) ) { error("do not support annotate all INFO,FORMAT fields. todo INFO/TAG instead\n"); } else if (!strncasecmp("FORMAT/", str.s, 7) || !strncasecmp("FMT/", str.s, 4)) { char *key = str.s + (!strncasecmp("FMT", str.s, 4) ? 4 : 7); if (!strcasecmp("GT", key)) error("It is not allowed to change GT tag."); int hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); if ( !bcf_hdr_idinfo_exists(header_out, BCF_HL_FMT, hdr_id) ) { if ( type == anno_is_vcf ) { bcf_hrec_t *hrec = bcf_hdr_get_hrec(header_in, BCF_HL_FMT, "ID", str.s, NULL); if ( !hrec ) error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); tmp.l = 0; bcf_hrec_format(hrec, &tmp); bcf_hdr_append(header_out, tmp.s); bcf_hdr_sync(header_out); hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); assert( bcf_hdr_idinfo_exists(header_out, BCF_HL_FMT, hdr_id) ); } else { error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); } } //int hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, key); nc++; cols = (struct anno_col*) realloc(cols, sizeof(struct anno_col)*(nc)); struct anno_col *col = &cols[nc-1]; col->icol = -1; col->replace = replace; col->hdr_key = strdup(key); switch ( bcf_hdr_id2type(header_out, BCF_HL_FMT, hdr_id) ) { case BCF_HT_INT: col->setter = type == anno_is_vcf ? vcf_setter_format_int : setter_format_int; break; case BCF_HT_REAL: col->setter = type == anno_is_vcf ? vcf_setter_format_real : setter_format_real; break; case BCF_HT_STR: col->setter = type == anno_is_vcf ? vcf_setter_format_str : setter_format_str; break; default : error("The type of %s not recognised (%d)\n", str.s, bcf_hdr_id2type(header_out, BCF_HL_FMT, hdr_id)); } } else if ( !strncasecmp("INFO/", str.s, 5) ) { memmove(str.s, str.s+5, str.l-4); str.l -= 4; int hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); if ( !bcf_hdr_idinfo_exists(header_out, BCF_HL_INFO, hdr_id) ) { if ( type == anno_is_vcf ) { bcf_hrec_t *hrec = bcf_hdr_get_hrec(header_in, BCF_HL_INFO, "ID", str.s, NULL); if ( !hrec ) error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); tmp.l = 0; bcf_hrec_format(hrec, &tmp); bcf_hdr_append(header_out, tmp.s); bcf_hdr_sync(header_out); hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); assert( bcf_hdr_idinfo_exists(header_out, BCF_HL_INFO, hdr_id) ); } else { error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); } } nc++; cols = (struct anno_col*) realloc(cols, sizeof(struct anno_col)*(nc)); struct anno_col *col = &cols[nc-1]; col->icol = i; col->replace = replace; col->hdr_key = strdup(str.s); col->number = bcf_hdr_id2length(header_out, BCF_HL_INFO, hdr_id); switch ( bcf_hdr_id2type(header_out, BCF_HL_INFO, hdr_id) ) { case BCF_HT_FLAG: col->setter = type == anno_is_vcf ? vcf_setter_info_flag : setter_info_flag; break; case BCF_HT_INT: col->setter = type == anno_is_vcf ? vcf_setter_info_int : setter_info_int; break; case BCF_HT_REAL: col->setter = type == anno_is_vcf ? vcf_setter_info_real : setter_info_real; break; case BCF_HT_STR: col->setter = type == anno_is_vcf ? vcf_setter_info_str : setter_info_str; break; default: error("The type of %s not recognised (%d)\n", str.s, bcf_hdr_id2type(header_out, BCF_HL_INFO, hdr_id)); } } if ( !*se ) break; ss = ++se; } *ncols = nc; if (str.m) free(str.s); if (tmp.m) free(tmp.s); return cols; }