Exemplo n.º 1
0
int main_vcfgtcheck(int argc, char *argv[])
{
	int c;
	args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->files  = bcf_sr_init();
	args->argc   = argc; args->argv = argv; set_cwd(args);
    char *regions = NULL, *targets = NULL;

	static struct option loptions[] = 
	{
		{"GTs-only",1,0,'G'},
		{"all-sites",0,0,'a'},
		{"homs-only",0,0,'H'},
		{"help",0,0,'h'},
		{"genotypes",1,0,'g'},
		{"plot",1,0,'p'},
		{"target-sample",1,0,'S'},
		{"query-sample",1,0,'s'},
        {"regions",1,0,'r'},
        {"targets",1,0,'t'},
		{0,0,0,0}
	};
	while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:at:G:",loptions,NULL)) >= 0) {
		switch (c) {
			case 'G': args->no_PLs = atoi(optarg); break;
			case 'a': args->all_sites = 1; break;
			case 'H': args->hom_only = 1; break;
			case 'g': args->gt_fname = optarg; break;
			case 'p': args->plot = optarg; break;
			case 'S': args->target_sample = optarg; break;
			case 's': args->query_sample = optarg; break;
            case 'r': regions = optarg; break;
            case 't': targets = optarg; break;
			case 'h': 
			case '?': usage();
			default: error("Unknown argument: %s\n", optarg);
		}
	}
    if ( argc==optind || argc>optind+1 )  usage();  // none or too many files given
    if ( !args->gt_fname ) args->cross_check = 1;   // no genotype file, run in cross-check mode
    else args->files->require_index = 1;
    if ( regions && bcf_sr_set_regions(args->files, regions)<0 ) error("Failed to read the regions: %s\n", regions);
    if ( targets && bcf_sr_set_targets(args->files, targets,0)<0 ) error("Failed to read the targets: %s\n", targets);
    if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open or the file not indexed: %s\n", argv[optind]);
    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) ) error("Failed to open or the file not indexed: %s\n", args->gt_fname);
    args->files->collapse = COLLAPSE_SNPS|COLLAPSE_INDELS;
    if ( args->plot ) args->plot = init_prefix(args->plot);
    init_data(args);
    if ( args->cross_check )
        cross_check_gts(args);
    else
        check_gt(args);
    destroy_data(args);
	bcf_sr_destroy(args->files);
    if (args->plot) free(args->plot);
	free(args);
	return 0;
}
Exemplo n.º 2
0
static void init_data(args_t *args)
{
    args->aux.srs = bcf_sr_init();

    // Open files for input and output, initialize structures
    if ( args->targets )
    {
        if ( bcf_sr_set_targets(args->aux.srs, args->targets, args->targets_is_file, args->aux.flag&CALL_CONSTR_ALLELES ? 3 : 0)<0 )
            error("Failed to read the targets: %s\n", args->targets);

        if ( args->aux.flag&CALL_CONSTR_ALLELES && args->flag&CF_INS_MISSED )
        {
            args->aux.srs->targets->missed_reg_handler = print_missed_line;
            args->aux.srs->targets->missed_reg_data = args;
        }
    }
    if ( args->regions )
    {
        if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
            error("Failed to read the targets: %s\n", args->regions);
    }

    int i;
    if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open: %s\n", args->bcf_fname);

    if ( args->nsamples && args->nsamples != bcf_hdr_nsamples(args->aux.srs->readers[0].header) )
    {
        args->samples_map = (int *) malloc(sizeof(int)*args->nsamples);
        args->aux.hdr = bcf_hdr_subset(args->aux.srs->readers[0].header, args->nsamples, args->samples, args->samples_map);
        for (i=0; i<args->nsamples; i++)
            if ( args->samples_map[i]<0 ) error("No such sample: %s\n", args->samples[i]);
        if ( !bcf_hdr_nsamples(args->aux.hdr) ) error("No matching sample found\n");
    }
    else
    {
        args->aux.hdr = bcf_hdr_dup(args->aux.srs->readers[0].header);
        for (i=0; i<args->nsamples; i++)
            if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
                error("No such sample: %s\n", args->samples[i]);
    }

    // Reorder ploidy and family indexes to match mpileup's output and exclude samples which are not available
    if ( args->aux.ploidy )
    {
        for (i=0; i<args->aux.nfams; i++)
        {
            int j;
            for (j=0; j<3; j++)
            {
                int k = bcf_hdr_id2int(args->aux.hdr, BCF_DT_SAMPLE, args->samples[ args->aux.fams[i].sample[j] ]);
                if ( k<0 ) error("No such sample: %s\n", args->samples[ args->aux.fams[i].sample[j] ]);
                args->aux.fams[i].sample[j] = k;
            }
        }
        uint8_t *ploidy = (uint8_t*) calloc(bcf_hdr_nsamples(args->aux.hdr), 1);
        for (i=0; i<args->nsamples; i++)    // i index in -s sample list
        {
            int j = bcf_hdr_id2int(args->aux.hdr, BCF_DT_SAMPLE, args->samples[i]);     // j index in the output VCF / subset VCF
            if ( j<0 )
            {
                fprintf(stderr,"Warning: no such sample: \"%s\"\n", args->samples[i]);
                continue;
            }
            ploidy[j] = args->aux.ploidy[i];
        }
        args->nsamples = bcf_hdr_nsamples(args->aux.hdr);
        for (i=0; i<args->nsamples; i++)
            assert( ploidy[i]==0 || ploidy[i]==1 || ploidy[i]==2 );
        free(args->aux.ploidy);
        args->aux.ploidy = ploidy;
    }

    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
    if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));

    if ( args->flag & CF_QCALL )
        return;

    if ( args->flag & CF_MCALL )
        mcall_init(&args->aux);

    if ( args->flag & CF_CCALL )
        ccall_init(&args->aux);

    if ( args->flag&CF_GVCF )
    {
        bcf_hdr_append(args->aux.hdr,"##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
        args->gvcf.rid  = -1;
        args->gvcf.line = bcf_init1();
        args->gvcf.gt   = (int32_t*) malloc(2*sizeof(int32_t)*bcf_hdr_nsamples(args->aux.hdr));
        for (i=0; i<bcf_hdr_nsamples(args->aux.hdr); i++)
        {
            args->gvcf.gt[2*i+0] = bcf_gt_unphased(0);
            args->gvcf.gt[2*i+1] = bcf_gt_unphased(0);
        }
    }

    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");

    bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
    bcf_hdr_write(args->out_fh, args->aux.hdr);

    if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
}
Exemplo n.º 3
0
int main_vcfisec(int argc, char *argv[])
{
    int c;
    args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->files  = bcf_sr_init();
    args->argc   = argc; args->argv = argv;
    args->output_fname = NULL;
    args->output_type = FT_VCF;
    args->n_threads = 0;
    args->record_cmd_line = 1;
    int targets_is_file = 0, regions_is_file = 0;

    static struct option loptions[] =
    {
        {"help",no_argument,NULL,'h'},
        {"exclude",required_argument,NULL,'e'},
        {"include",required_argument,NULL,'i'},
        {"collapse",required_argument,NULL,'c'},
        {"complement",no_argument,NULL,'C'},
        {"apply-filters",required_argument,NULL,'f'},
        {"nfiles",required_argument,NULL,'n'},
        {"prefix",required_argument,NULL,'p'},
        {"write",required_argument,NULL,'w'},
        {"targets",required_argument,NULL,'t'},
        {"targets-file",required_argument,NULL,'T'},
        {"regions",required_argument,NULL,'r'},
        {"regions-file",required_argument,NULL,'R'},
        {"output",required_argument,NULL,'o'},
        {"output-type",required_argument,NULL,'O'},
        {"threads",required_argument,NULL,9},
        {"no-version",no_argument,NULL,8},
        {NULL,0,NULL,0}
    };
    while ((c = getopt_long(argc, argv, "hc:r:R:p:n:w:t:T:Cf:o:O:i:e:",loptions,NULL)) >= 0) {
        switch (c) {
            case 'o': args->output_fname = optarg; break;
            case 'O':
                switch (optarg[0]) {
                    case 'b': args->output_type = FT_BCF_GZ; break;
                    case 'u': args->output_type = FT_BCF; break;
                    case 'z': args->output_type = FT_VCF_GZ; break;
                    case 'v': args->output_type = FT_VCF; break;
                    default: error("The output type \"%s\" not recognised\n", optarg);
                }
                break;
            case 'c':
                if ( !strcmp(optarg,"snps") ) args->files->collapse |= COLLAPSE_SNPS;
                else if ( !strcmp(optarg,"indels") ) args->files->collapse |= COLLAPSE_INDELS;
                else if ( !strcmp(optarg,"both") ) args->files->collapse |= COLLAPSE_SNPS | COLLAPSE_INDELS;
                else if ( !strcmp(optarg,"any") ) args->files->collapse |= COLLAPSE_ANY;
                else if ( !strcmp(optarg,"all") ) args->files->collapse |= COLLAPSE_ANY;
                else if ( !strcmp(optarg,"some") ) args->files->collapse |= COLLAPSE_SOME;
                else if ( !strcmp(optarg,"none") ) args->files->collapse = COLLAPSE_NONE;
                else error("The --collapse string \"%s\" not recognised.\n", optarg);
                break;
            case 'f': args->files->apply_filters = optarg; break;
            case 'C': args->isec_op = OP_COMPLEMENT; break;
            case 'r': args->regions_list = optarg; break;
            case 'R': args->regions_list = optarg; regions_is_file = 1; break;
            case 't': args->targets_list = optarg; break;
            case 'T': args->targets_list = optarg; targets_is_file = 1; break;
            case 'p': args->prefix = optarg; break;
            case 'w': args->write_files = optarg; break;
            case 'i': add_filter(args, optarg, FLT_INCLUDE); break;
            case 'e': add_filter(args, optarg, FLT_EXCLUDE); break;
            case 'n':
                {
                    char *p = optarg;
                    if ( *p=='-' ) { args->isec_op = OP_MINUS; p++; }
                    else if ( *p=='+' ) { args->isec_op = OP_PLUS; p++; }
                    else if ( *p=='=' ) { args->isec_op = OP_EQUAL; p++; }
                    else if ( *p=='~' ) { args->isec_op = OP_EXACT; p++; }
                    else if ( isdigit(*p) ) args->isec_op = OP_EQUAL;
                    else error("Could not parse --nfiles %s\n", optarg);
                    if ( args->isec_op == OP_EXACT ) args->isec_exact = p;
                    else if ( sscanf(p,"%d",&args->isec_n)!=1 ) error("Could not parse --nfiles %s\n", optarg);
                }
                break;
            case  9 : args->n_threads = strtol(optarg, 0, 0); break;
            case  8 : args->record_cmd_line = 0; break;
            case 'h':
            case '?': usage();
            default: error("Unknown argument: %s\n", optarg);
        }
    }
    if ( argc-optind<1 ) usage();   // no file given
    if ( args->targets_list && bcf_sr_set_targets(args->files, args->targets_list, targets_is_file,0)<0 )
        error("Failed to read the targets: %s\n", args->targets_list);
    if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
        error("Failed to read the regions: %s\n", args->regions_list);
    if ( argc-optind==2 && !args->isec_op )
    {
        args->isec_op = OP_VENN;
        if ( !args->prefix ) error("Expected the -p option\n");
    }
    if ( !args->targets_list )
    {
        if ( argc-optind<2  ) error("Expected multiple files or the --targets option\n");
        if ( !args->isec_op ) error("Expected two file names or one of the options --complement, --nfiles or --targets\n");
    }
    args->files->require_index = 1;
    while (optind<argc)
    {
        if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open %s: %s\n", argv[optind],bcf_sr_strerror(args->files->errnum));
        optind++;
    }
    init_data(args);
    isec_vcf(args);
    destroy_data(args);
    bcf_sr_destroy(args->files);
    free(args);
    return 0;
}
Exemplo n.º 4
0
static void init_data(args_t *args)
{
    bcf_srs_t *files = bcf_sr_init();
    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(files, args->regions_list, args->regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(files, args->targets_list, args->targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
    }
    if ( !bcf_sr_add_reader(files, args->fname) ) error("Failed to open %s: %s\n", args->fname,bcf_sr_strerror(files->errnum));
    bcf_hdr_t *hdr = files->readers[0].header;
    if ( !args->sample )
    {
        if ( bcf_hdr_nsamples(hdr)>1 ) error("Missing the option -s, --sample\n");
        args->sample = hdr->samples[0];
    }
    else if ( bcf_hdr_id2int(hdr,BCF_DT_SAMPLE,args->sample)<0 ) error("No such sample: %s\n", args->sample);
    int ret = bcf_hdr_set_samples(hdr, args->sample, 0);
    if ( ret<0 ) error("Error setting the sample: %s\n", args->sample);

    if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,bcf_hdr_id2int(hdr,BCF_DT_ID,"BAF")) )
        error("The tag FORMAT/BAF is not present in the VCF: %s\n", args->fname);

    int i;
    args->xvals = (double*) calloc(args->nbins,sizeof(double));
    for (i=0; i<args->nbins; i++) args->xvals[i] = 1.0*i/(args->nbins-1);

    // collect BAF distributions for all chromosomes
    int idist = -1, nbaf = 0, nprocessed = 0, ntotal = 0, prev_chr = -1;
    float *baf = NULL;
    while ( bcf_sr_next_line(files) )
    {
        ntotal++;

        bcf1_t *line = bcf_sr_get_line(files,0);
        if ( bcf_get_format_float(hdr,line,"BAF",&baf,&nbaf) != 1 ) continue;
        if ( bcf_float_is_missing(baf[0]) ) continue;

        nprocessed++;

        if ( prev_chr==-1 || prev_chr!=line->rid )
        {
            // new chromosome
            idist = args->ndist++;
            args->dist = (dist_t*) realloc(args->dist, sizeof(dist_t)*args->ndist);
            memset(&args->dist[idist],0,sizeof(dist_t));
            args->dist[idist].chr   = strdup(bcf_seqname(hdr,line));
            args->dist[idist].yvals = (double*) calloc(args->nbins,sizeof(double));
            args->dist[idist].xvals = args->xvals;
            args->dist[idist].nvals = args->nbins;
            prev_chr = line->rid;
        }
        int bin = baf[0]*(args->nbins-1);
        args->dist[idist].yvals[bin]++;   // the distribution
    }
    free(baf);
    bcf_sr_destroy(files);

    for (idist=0; idist<args->ndist; idist++)
    {
        #if 0
            int j;
            for (j=0; j<args->nbins; j++)
            {
                double x = args->dist[idist].xvals[j];
                args->dist[idist].yvals[j] = exp(-(x-0.5)*(x-0.5)/1e-3);
            }
        #endif
        init_dist(args, &args->dist[idist],args->verbose);
    }

    args->dat_fp = open_file(&args->dat_fname,"w","%s/dist.dat", args->output_dir);
    fprintf(args->dat_fp, "# This file was produced by: bcftools polysomy(%s+htslib-%s), the command line was:\n", bcftools_version(),hts_version());
    fprintf(args->dat_fp, "# \t bcftools %s ", args->argv[0]);
    for (i=1; i<args->argc; i++)
        fprintf(args->dat_fp, " %s",args->argv[i]);
    fprintf(args->dat_fp,"\n#\n");
    fprintf(args->dat_fp,"# DIST\t[2]Chrom\t[3]BAF\t[4]Normalized Count\n");
    fprintf(args->dat_fp,"# FIT\t[2]Goodness of Fit\t[3]iFrom\t[4]iTo\t[5]The Fitted Function\n");
    fprintf(args->dat_fp,"# CN\t[2]Chrom\t[3]Estimated Copy Number\t[4]Absolute fit deviation\n");

    char *fname = NULL;
    FILE *fp = open_file(&fname,"w","%s/dist.py", args->output_dir);
//-------- matplotlib script --------------
    fprintf(fp,
        "#!/usr/bin/env python\n"
        "#\n"
        "import matplotlib as mpl\n"
        "mpl.use('Agg')\n"
        "import matplotlib.pyplot as plt\n"
        "import csv,sys,argparse\n"
        "from math import exp\n"
        "\n"
        "outdir = '%s'\n"
        "\n"
        "def read_dat(dat,fit,cn):\n"
        "   csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)\n"
        "   with open(outdir+'/dist.dat', 'rb') as f:\n"
        "      reader = csv.reader(f, 'tab')\n"
        "      for row in reader:\n"
        "          if row[0][0]=='#': continue\n"
        "          type = row[0]\n"
        "          chr  = row[1]\n"
        "          if type=='DIST':\n"
        "              if chr not in dat: dat[chr] = []\n"
        "              dat[chr].append(row)\n"
        "          elif type=='FIT':\n"
        "              if chr not in fit: fit[chr] = []\n"
        "              fit[chr].append(row)\n"
        "          elif type=='CN':\n"
        "              cn[chr] = row[2]\n"
        "\n"
        "def plot_dist(dat,fit,chr):\n"
        "   fig, ax = plt.subplots(1, 1, figsize=(7,5))\n"
        "   ax.plot([x[2] for x in dat[chr]],[x[3] for x in dat[chr]],'k-',label='Distribution')\n"
        "   if chr in fit:\n"
        "       for i in range(len(fit[chr])):\n"
        "           pfit = fit[chr][i]\n"
        "           exec('def xfit(x): return '+pfit[5])\n"
        "           istart = int(pfit[3])\n"
        "           iend   = int(pfit[4])+1\n"
        "           vals   = dat[chr][istart:iend]\n"
        "           args   = {}\n"
        "           if i==0: args = {'label':'Target to Fit'}\n"
        "           ax.plot([x[2] for x in vals],[x[3] for x in vals],'r-',**args)\n"
        "           if i==0: args = {'label':'Best Fit'}\n"
        "           ax.plot([x[2] for x in vals],[xfit(float(x[2])) for x in vals],'g-',**args)\n"
        "   ax.set_title('BAF distribution, chr'+chr)\n"
        "   ax.set_xlabel('BAF')\n"
        "   ax.set_ylabel('Frequency')\n"
        "   ax.legend(loc='best',prop={'size':7},frameon=False)\n"
        "   plt.savefig(outdir+'/dist.chr'+chr+'.png')\n"
        "   plt.close()\n"
        "\n"
        "def plot_copy_number(cn):\n"
        "   fig, ax = plt.subplots(1, 1, figsize=(7,5))\n"
        "   xlabels = sorted(cn.keys())\n"
        "   xvals = range(len(xlabels))\n"
        "   yvals = [float(cn[x]) for x in xlabels]\n"
        "   ax.plot(xvals,yvals,'o',color='red')\n"
        "   for i in range(len(xvals)):\n"
        "       if yvals[i]==-1: ax.annotate('?', xy=(xvals[i],0.5),va='center',ha='center',color='red',fontweight='bold')\n"
        "   ax.tick_params(axis='both', which='major', labelsize=9)\n"
        "   ax.set_xticks(xvals)\n"
        "   ax.set_xticklabels(xlabels,rotation=45)\n"
        "   ax.set_xlim(-1,len(xlabels))\n"
        "   ax.set_ylim(0,5.0)\n"
        "   ax.set_yticks([1.0,2.0,3.0,4.0])\n"
        "   ax.set_xlabel('Chromosome')\n"
        "   ax.set_ylabel('Copy Number')\n"
        "   plt.savefig(outdir+'/copy-number.png')\n"
        "   plt.close()\n"
        "\n"
        "class myParser(argparse.ArgumentParser):\n"
        "   def error(self, message):\n"
        "       self.print_help()\n"
        "       sys.stderr.write('error: %%s\\n' %% message)\n"
        "       sys.exit(2)\n"
        "\n"
        "def main():\n"
        "   parser = myParser()\n"
        "   parser.add_argument('-a', '--all', action='store_true', help='Create all plots')\n"
        "   parser.add_argument('-c', '--copy-number', action='store_true', help='Create copy-number plot')\n"
        "   parser.add_argument('-d', '--distrib', metavar='CHR', help='Plot BAF distribution of a single chromosome')\n"
        "   args = parser.parse_args()\n"
        "   dat = {}; fit = {}; cn = {}\n"
        "   read_dat(dat,fit,cn)\n"
        "   if args.distrib!=None:\n"
        "       plot_dist(dat,fit,args.distrib)\n"
        "   if args.all:\n"
        "       for chr in dat: plot_dist(dat,fit,chr)\n"
        "       plot_copy_number(cn)\n"
        "   elif args.copy_number:\n"
        "       plot_copy_number(cn)\n"
        "   else:\n"
        "       for chr in dat: plot_dist(dat,fit,chr)\n"
        "\n"
        "if __name__ == '__main__':\n"
        "   main()\n",
        args->output_dir);
//---------------------------------------
    chmod(fname, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH|S_IXUSR|S_IXGRP|S_IXOTH);
    free(fname);
    fclose(fp);
}
Exemplo n.º 5
0
static void init_data(args_t *args)
{
    args->sr = bcf_sr_init();
    if ( args->region )
    {
        args->sr->require_index = 1;
        if ( bcf_sr_set_regions(args->sr, args->region, args->region_is_file)<0 ) error("Failed to read the regions: %s\n",args->region);
    }
    if ( args->target && bcf_sr_set_targets(args->sr, args->target, args->target_is_file, 0)<0 ) error("Failed to read the targets: %s\n",args->target);
    if ( !bcf_sr_add_reader(args->sr,args->fname) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum));
    args->hdr_in  = bcf_sr_get_header(args->sr,0);
    args->hdr_out = bcf_hdr_dup(args->hdr_in);

    if ( args->filter_str )
        args->filter = filter_init(args->hdr_in, args->filter_str);

    mkdir_p("%s/",args->output_dir);

    int i, nsmpl = bcf_hdr_nsamples(args->hdr_in);
    if ( !nsmpl ) error("No samples to split: %s\n", args->fname);
    args->fh = (htsFile**)calloc(nsmpl,sizeof(*args->fh));
    args->bnames = set_file_base_names(args);
    kstring_t str = {0,0,0};
    for (i=0; i<nsmpl; i++)
    {
        if ( !args->bnames[i] ) continue;
        str.l = 0;
        kputs(args->output_dir, &str);
        if ( str.s[str.l-1] != '/' ) kputc('/', &str);
        int k, l = str.l;
        kputs(args->bnames[i], &str);
        for (k=l; k<str.l; k++) if ( isspace(str.s[k]) ) str.s[k] = '_';
        if ( args->output_type & FT_BCF ) kputs(".bcf", &str);
        else if ( args->output_type & FT_GZ ) kputs(".vcf.gz", &str);
        else kputs(".vcf", &str);
        args->fh[i] = hts_open(str.s, hts_bcf_wmode(args->output_type));
        if ( args->fh[i] == NULL ) error("Can't write to \"%s\": %s\n", str.s, strerror(errno));
        bcf_hdr_nsamples(args->hdr_out) = 1;
        args->hdr_out->samples[0] = args->bnames[i];
        bcf_hdr_write(args->fh[i], args->hdr_out);
    }
    free(str.s);

    // parse tags
    int is_info = 0, is_fmt = 0;
    char *beg = args->keep_tags;
    while ( beg && *beg )
    {
        if ( !strncasecmp("INFO/",beg,5) ) { is_info = 1; is_fmt = 0; beg += 5; }
        else if ( !strcasecmp("INFO",beg) ) { args->keep_info = 1; break; }
        else if ( !strncasecmp("INFO,",beg,5) ) { args->keep_info = 1; beg += 5; continue; }
        else if ( !strncasecmp("FMT/",beg,4) ) { is_info = 0; is_fmt = 1; beg += 4; }
        else if ( !strncasecmp("FORMAT/",beg,7) ) { is_info = 0; is_fmt = 1; beg += 7; }
        else if ( !strcasecmp("FMT",beg) ) { args->keep_fmt = 1; break; }
        else if ( !strcasecmp("FORMAT",beg) ) { args->keep_fmt = 1; break; }
        else if ( !strncasecmp("FMT,",beg,4) ) { args->keep_fmt = 1; beg += 4; continue; }
        else if ( !strncasecmp("FORMAT,",beg,7) ) { args->keep_fmt = 1; beg += 7; continue; }
        char *end = beg;
        while ( *end && *end!=',' ) end++;
        char tmp = *end; *end = 0;
        int id = bcf_hdr_id2int(args->hdr_in, BCF_DT_ID, beg);
        beg = tmp ? end + 1 : end;
        if ( is_info && bcf_hdr_idinfo_exists(args->hdr_in,BCF_HL_INFO,id) )
        {
            if ( id >= args->ninfo_tags ) args->ninfo_tags = id + 1;
            hts_expand0(uint8_t, args->ninfo_tags, args->minfo_tags, args->info_tags);
            args->info_tags[id] = 1;
        }
        if ( is_fmt && bcf_hdr_idinfo_exists(args->hdr_in,BCF_HL_FMT,id) )
        {
            if ( id >= args->nfmt_tags ) args->nfmt_tags = id + 1;
            hts_expand0(uint8_t, args->nfmt_tags, args->mfmt_tags, args->fmt_tags);
            args->fmt_tags[id] = 1;
        }
    }
    if ( !args->keep_info && !args->keep_fmt && !args->ninfo_tags && !args->nfmt_tags )
    {
        args->keep_info = args->keep_fmt = 1;
    }
}
Exemplo n.º 6
0
int main_vcfgtcheck(int argc, char *argv[])
{
    int c;
    args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->files  = bcf_sr_init();
    args->argc   = argc; args->argv = argv; set_cwd(args);
    char *regions = NULL, *targets = NULL;
    int regions_is_file = 0, targets_is_file = 0;

    static struct option loptions[] =
    {
        {"GTs-only",1,0,'G'},
        {"all-sites",0,0,'a'},
        {"homs-only",0,0,'H'},
        {"help",0,0,'h'},
        {"genotypes",1,0,'g'},
        {"plot",1,0,'p'},
        {"target-sample",1,0,'S'},
        {"query-sample",1,0,'s'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {0,0,0,0}
    };
    char *tmp;
    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:",loptions,NULL)) >= 0) {
        switch (c) {
            case 'G':
                args->no_PLs = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: --GTs-only %s\n", optarg);
                break;
            case 'a': args->all_sites = 1; break;
            case 'H': args->hom_only = 1; break;
            case 'g': args->gt_fname = optarg; break;
            case 'p': args->plot = optarg; break;
            case 'S': args->target_sample = optarg; break;
            case 's': args->query_sample = optarg; break;
            case 'r': regions = optarg; break;
            case 'R': regions = optarg; regions_is_file = 1; break;
            case 't': targets = optarg; break;
            case 'T': targets = optarg; targets_is_file = 1; break;
            case 'h':
            case '?': usage();
            default: error("Unknown argument: %s\n", optarg);
        }
    }
    char *fname = NULL;
    if ( optind==argc )
    {
        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
        else usage();   // no files given
    }
    else fname = argv[optind];
    if ( argc>optind+1 )  usage();  // too many files given
    if ( !args->gt_fname ) args->cross_check = 1;   // no genotype file, run in cross-check mode
    else args->files->require_index = 1;
    if ( regions && bcf_sr_set_regions(args->files, regions, regions_is_file)<0 ) error("Failed to read the regions: %s\n", regions);
    if ( targets && bcf_sr_set_targets(args->files, targets, targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", targets);
    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) ) error("Failed to open %s: %s\n", args->gt_fname,bcf_sr_strerror(args->files->errnum));
    args->files->collapse = COLLAPSE_SNPS|COLLAPSE_INDELS;
    if ( args->plot ) args->plot = init_prefix(args->plot);
    init_data(args);
    if ( args->cross_check )
        cross_check_gts(args);
    else
        check_gt(args);
    destroy_data(args);
    bcf_sr_destroy(args->files);
    if (args->plot) free(args->plot);
    free(args);
    return 0;
}
Exemplo n.º 7
0
int main_plugin(int argc, char *argv[])
{
    int c;
    args_t *args  = (args_t*) calloc(1,sizeof(args_t));
    args->argc    = argc; args->argv = argv;
    args->files   = bcf_sr_init();
    args->output_fname = "-";
    args->output_type = FT_VCF;
    args->nplugin_paths = -1;
    int regions_is_file = 0, targets_is_file = 0, plist_only = 0;

    if ( argc==1 ) usage(args);
    char *plugin_name = NULL;
    if ( argv[1][0]!='-' ) { plugin_name = argv[1]; argc--; argv++; }

    static struct option loptions[] =
    {
        {"verbose",0,0,'v'},
        {"help",0,0,'h'},
        {"list-plugins",0,0,'l'},
        {"output",1,0,'o'},
        {"output-type",1,0,'O'},
        {"include",1,0,'i'},
        {"exclude",1,0,'e'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {0,0,0,0}
    };
    while ((c = getopt_long(argc, argv, "h?o:O:r:R:li:e:v",loptions,NULL)) >= 0)
    {
        switch (c) {
            case 'v': args->verbose = 1; break;
            case 'o': args->output_fname = optarg; break;
            case 'O':
                switch (optarg[0]) {
                    case 'b': args->output_type = FT_BCF_GZ; break;
                    case 'u': args->output_type = FT_BCF; break;
                    case 'z': args->output_type = FT_VCF_GZ; break;
                    case 'v': args->output_type = FT_VCF; break;
                    default: error("The output type \"%s\" not recognised\n", optarg);
                };
                break;
            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
            case 'r': args->regions_list = optarg; break;
            case 'R': args->regions_list = optarg; regions_is_file = 1; break;
            case 't': args->targets_list = optarg; break;
            case 'T': args->targets_list = optarg; targets_is_file = 1; break;
            case 'l': plist_only = 1; break;
            case '?':
            case 'h': load_plugin(args, plugin_name, 1, &args->plugin); fprintf(stderr,"%s",args->plugin.usage()); return 0; break;
            default: error("Unknown argument: %s\n", optarg);
        }
    }
    if ( plist_only )  return list_plugins(args);

    char *fname = NULL;
    if ( optind>=argc || argv[optind][0]=='-' )
    {
        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
        else usage(args);
        args->plugin.argc = argc - optind + 1;
        args->plugin.argv = argv + optind - 1;
    }
    else
    {
        fname = argv[optind];
        args->plugin.argc = argc - optind;
        args->plugin.argv = argv + optind;
    }
    optind = 0;
    args->plugin.argv[0] = plugin_name;
    load_plugin(args, plugin_name, 1, &args->plugin);

    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
        args->files->collapse |= COLLAPSE_SOME;
    }
    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open or the file not indexed: %s\n", fname);

    init_data(args);
    while ( bcf_sr_next_line(args->files) )
    {
        bcf1_t *line = bcf_sr_get_line(args->files,0);
        if ( args->filter )
        {
            int pass = filter_test(args->filter, line, NULL);
            if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
            if ( !pass ) continue;
        }
        line = args->plugin.process(line);
        if ( line ) bcf_write1(args->out_fh, args->hdr_out, line);
    }
    destroy_data(args);
    bcf_sr_destroy(args->files);
    free(args);
    return 0;
}
Exemplo n.º 8
0
int main_vcfview(int argc, char *argv[])
{
    int c;
    args_t *args  = (args_t*) calloc(1,sizeof(args_t));
    args->argc    = argc; args->argv = argv;
    args->files   = bcf_sr_init();
    args->clevel  = -1;
    args->print_header = 1;
    args->update_info = 1;
    args->output_type = FT_VCF;
    int targets_is_file = 0, regions_is_file = 0;

    static struct option loptions[] =
    {
        {"genotype",1,0,'g'},
        {"compression-level",1,0,'l'},
        {"header-only",0,0,'h'},
        {"no-header",0,0,'H'},
        {"exclude",1,0,'e'},
        {"include",1,0,'i'},
        {"trim-alt-alleles",0,0,'a'},
        {"no-update",0,0,'I'},
        {"drop-genotypes",0,0,'G'},
        {"private",0,0,'x'},
        {"exclude-private",0,0,'X'},
        {"uncalled",0,0,'u'},
        {"exclude-uncalled",0,0,'U'},
        {"apply-filters",1,0,'f'},
        {"known",0,0,'k'},
        {"novel",0,0,'n'},
        {"min-alleles",1,0,'m'},
        {"max-alleles",1,0,'M'},
        {"samples",1,0,'s'},
        {"samples-file",1,0,'S'},
        {"force-samples",0,0,1},
        {"output-type",1,0,'O'},
        {"output-file",1,0,'o'},
        {"types",1,0,'v'},
        {"exclude-types",1,0,'V'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"min-ac",1,0,'c'},
        {"max-ac",1,0,'C'},
        {"min-af",1,0,'q'},
        {"max-af",1,0,'Q'},
        {"phased",0,0,'p'},
        {"exclude-phased",0,0,'P'},
        {0,0,0,0}
    };
    char *tmp;
    while ((c = getopt_long(argc, argv, "l:t:T:r:R:o:O:s:S:Gf:knv:V:m:M:auUhHc:C:Ii:e:xXpPq:Q:g:",loptions,NULL)) >= 0)
    {
        char allele_type[8] = "nref";
        switch (c)
        {
            case 'O':
                switch (optarg[0]) {
                    case 'b': args->output_type = FT_BCF_GZ; break;
                    case 'u': args->output_type = FT_BCF; break;
                    case 'z': args->output_type = FT_VCF_GZ; break;
                    case 'v': args->output_type = FT_VCF; break;
                    default: error("The output type \"%s\" not recognised\n", optarg);
                };
                break;
            case 'l':
                args->clevel = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: --compression-level %s\n", optarg);
                args->output_type |= FT_GZ; 
                break;
            case 'o': args->fn_out = optarg; break;
            case 'H': args->print_header = 0; break;
            case 'h': args->header_only = 1; break;

            case 't': args->targets_list = optarg; break;
            case 'T': args->targets_list = optarg; targets_is_file = 1; break;
            case 'r': args->regions_list = optarg; break;
            case 'R': args->regions_list = optarg; regions_is_file = 1; break;

            case 's': args->sample_names = optarg; break;
            case 'S': args->sample_names = optarg; args->sample_is_file = 1; break;
            case  1 : args->force_samples = 1; break;
            case 'a': args->trim_alts = 1; args->calc_ac = 1; break;
            case 'I': args->update_info = 0; break;
            case 'G': args->sites_only = 1; break;

            case 'f': args->files->apply_filters = optarg; break;
            case 'k': args->known = 1; break;
            case 'n': args->novel = 1; break;
            case 'm':
                args->min_alleles = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: --min-alleles %s\n", optarg);
                break;
            case 'M': 
                args->max_alleles = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: --max-alleles %s\n", optarg);
                break;
            case 'v': args->include_types = optarg; break;
            case 'V': args->exclude_types = optarg; break;
            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;

            case 'c':
            {
                args->min_ac_type = ALLELE_NONREF;
                if ( sscanf(optarg,"%d:%s",&args->min_ac, allele_type)!=2 && sscanf(optarg,"%d",&args->min_ac)!=1 )
                    error("Error: Could not parse --min-ac %s\n", optarg);
                set_allele_type(&args->min_ac_type, allele_type);
                args->calc_ac = 1;
                break;
            }
            case 'C':
            {
                args->max_ac_type = ALLELE_NONREF;
                if ( sscanf(optarg,"%d:%s",&args->max_ac, allele_type)!=2 && sscanf(optarg,"%d",&args->max_ac)!=1 )
                    error("Error: Could not parse --max-ac %s\n", optarg);
                set_allele_type(&args->max_ac_type, allele_type);
                args->calc_ac = 1;
                break;
            }
            case 'q':
            {
                args->min_af_type = ALLELE_NONREF;
                if ( sscanf(optarg,"%f:%s",&args->min_af, allele_type)!=2 && sscanf(optarg,"%f",&args->min_af)!=1 )
                    error("Error: Could not parse --min_af %s\n", optarg);
                set_allele_type(&args->min_af_type, allele_type);
                args->calc_ac = 1;
                break;
            }
            case 'Q':
            {
                args->max_af_type = ALLELE_NONREF;
                if ( sscanf(optarg,"%f:%s",&args->max_af, allele_type)!=2 && sscanf(optarg,"%f",&args->max_af)!=1 )
                    error("Error: Could not parse --min_af %s\n", optarg);
                set_allele_type(&args->max_af_type, allele_type);
                args->calc_ac = 1;
                break;
            }

            case 'x': args->private_vars |= FLT_INCLUDE; args->calc_ac = 1; break;
            case 'X': args->private_vars |= FLT_EXCLUDE; args->calc_ac = 1; break;
            case 'u': args->uncalled |= FLT_INCLUDE; args->calc_ac = 1; break;
            case 'U': args->uncalled |= FLT_EXCLUDE; args->calc_ac = 1; break;
            case 'p': args->phased |= FLT_INCLUDE; break; // phased
            case 'P': args->phased |= FLT_EXCLUDE; break; // exclude-phased
            case 'g':
            {
                if ( !strcasecmp(optarg,"hom") ) args->gt_type = GT_NEED_HOM;
                else if ( !strcasecmp(optarg,"het") ) args->gt_type = GT_NEED_HET;
                else if ( !strcasecmp(optarg,"miss") ) args->gt_type = GT_NEED_MISSING;
                else if ( !strcasecmp(optarg,"^hom") ) args->gt_type = GT_NO_HOM;
                else if ( !strcasecmp(optarg,"^het") ) args->gt_type = GT_NO_HET;
                else if ( !strcasecmp(optarg,"^miss") ) args->gt_type = GT_NO_MISSING;
                else error("The argument to -g not recognised. Expected one of hom/het/miss/^hom/^het/^miss, got \"%s\".\n", optarg);
                break;
            }
            case '?': usage(args);
            default: error("Unknown argument: %s\n", optarg);
        }
    }

    if ( args->filter_logic == (FLT_EXCLUDE|FLT_INCLUDE) ) error("Only one of -i or -e can be given.\n");
    if ( args->private_vars > FLT_EXCLUDE ) error("Only one of -x or -X can be given.\n");
    if ( args->uncalled > FLT_EXCLUDE ) error("Only one of -u or -U can be given.\n");
    if ( args->phased > FLT_EXCLUDE ) error("Only one of -p or -P can be given.\n");

    if ( args->sample_names && args->update_info) args->calc_ac = 1;

    char *fname = NULL;
    if ( optind>=argc )
    {
        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
        else usage(args);
    }
    else fname = argv[optind];

    // read in the regions from the command line
    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    else if ( optind+1 < argc )
    {
        int i;
        kstring_t tmp = {0,0,0};
        kputs(argv[optind+1],&tmp);
        for (i=optind+2; i<argc; i++) { kputc(',',&tmp); kputs(argv[i],&tmp); }
        if ( bcf_sr_set_regions(args->files, tmp.s, 0)<0 )
            error("Failed to read the regions: %s\n", tmp.s);
        free(tmp.s);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
    }

    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));

    init_data(args);
    bcf_hdr_t *out_hdr = args->hnull ? args->hnull : (args->hsub ? args->hsub : args->hdr);
    if (args->print_header)
        bcf_hdr_write(args->out, out_hdr);
    else if ( args->output_type & FT_BCF )
        error("BCF output requires header, cannot proceed with -H\n");
    if (!args->header_only)
    {
        while ( bcf_sr_next_line(args->files) )
        {
            bcf1_t *line = args->files->readers[0].buffer[0];
            if ( line->errcode && out_hdr!=args->hdr ) error("Undefined tags in the header, cannot proceed in the sample subset mode.\n");
            if ( subset_vcf(args, line) )
                bcf_write1(args->out, out_hdr, line);
        }
    }
    hts_close(args->out);
    destroy_data(args);
    bcf_sr_destroy(args->files);
    free(args);
    return 0;
}
Exemplo n.º 9
0
int main_vcfroh(int argc, char *argv[])
{
    int c;
    args_t *args  = (args_t*) calloc(1,sizeof(args_t));
    args->argc    = argc; args->argv = argv;
    args->files   = bcf_sr_init();
    args->t2AZ    = 6.7e-8;
    args->t2HW    = 5e-9;
    args->rec_rate = 0;
    int regions_is_file = 0, targets_is_file = 0;

    static struct option loptions[] =
    {
        {"AF-tag",1,0,0},
        {"AF-file",1,0,1},
        {"AF-dflt",1,0,2},
        {"estimate-AF",1,0,'e'},
        {"GTs-only",1,0,'G'},
        {"sample",1,0,'s'},
        {"hw-to-az",1,0,'a'},
        {"az-to-hw",1,0,'H'},
        {"viterbi-training",0,0,'V'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"genetic-map",1,0,'m'},
        {"rec-rate",1,0,'M'},
        {"skip-indels",0,0,'I'},
        {0,0,0,0}
    };

    int naf_opts = 0;
    char *tmp;
    while ((c = getopt_long(argc, argv, "h?r:R:t:T:H:a:s:m:M:G:Ia:e:V",loptions,NULL)) >= 0) {
        switch (c) {
            case 0: args->af_tag = optarg; naf_opts++; break;
            case 1: args->af_fname = optarg; naf_opts++; break;
            case 2: 
                args->dflt_AF = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
                break;
            case 'e': args->estimate_AF = optarg; naf_opts++; break;
            case 'I': args->snps_only = 1; break;
            case 'G':
                args->fake_PLs = 1; 
                args->unseen_PL = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -G %s\n", optarg);
                args->unseen_PL = pow(10,-args->unseen_PL/10.); 
                break;
            case 'm': args->genmap_fname = optarg; break;
            case 'M':
                args->rec_rate = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -M %s\n", optarg);
                break;
            case 's': args->sample = strdup(optarg); break;
            case 'a':
                args->t2AZ = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -a %s\n", optarg);
                break;
            case 'H':
                args->t2HW = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -H %s\n", optarg);
                break;
            case 't': args->targets_list = optarg; break;
            case 'T': args->targets_list = optarg; targets_is_file = 1; break;
            case 'r': args->regions_list = optarg; break;
            case 'R': args->regions_list = optarg; regions_is_file = 1; break;
            case 'V': args->vi_training = 1; break;
            case 'h': 
            case '?': usage(args); break;
            default: error("Unknown argument: %s\n", optarg);
        }
    }

    if ( argc<optind+1 ) usage(args);
    if ( args->t2AZ<0 || args->t2AZ>1 ) error("Error: The parameter --hw-to-az is not in [0,1]\n", args->t2AZ);
    if ( args->t2HW<0 || args->t2HW>1 ) error("Error: The parameter --az-to-hw is not in [0,1]\n", args->t2HW);
    if ( naf_opts>1 ) error("Error: The options --AF-tag, --AF-file and -e are mutually exclusive\n");
    if ( args->af_fname && args->targets_list ) error("Error: The options --AF-file and -t are mutually exclusive\n");
    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
    }
    if ( args->af_fname )
    {
        if ( bcf_sr_set_targets(args->files, args->af_fname, 1, 3)<0 )
            error("Failed to read the targets: %s\n", args->af_fname);
    }
    if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open %s: %s\n", argv[optind],bcf_sr_strerror(args->files->errnum));

    init_data(args);
    while ( bcf_sr_next_line(args->files) )
    {
        vcfroh(args, args->files->readers[0].buffer[0]);
    }
    vcfroh(args, NULL);
    fprintf(pysamerr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
    destroy_data(args);
    free(args);
    return 0;
}
Exemplo n.º 10
0
int main_vcfquery(int argc, char *argv[])
{
    int c, collapse = 0;
    args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->argc   = argc; args->argv = argv;
    int regions_is_file = 0, targets_is_file = 0;

    static struct option loptions[] =
    {
        {"help",0,0,'h'},
        {"list-samples",0,0,'l'},
        {"include",1,0,'i'},
        {"exclude",1,0,'e'},
        {"format",1,0,'f'},
        {"output-file",1,0,'o'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {"annots",1,0,'a'},
        {"samples",1,0,'s'},
        {"samples-file",1,0,'S'},
        {"print-header",0,0,'H'},
        {"collapse",1,0,'c'},
        {"vcf-list",1,0,'v'},
        {"allow-undef-tags",0,0,'u'},
        {0,0,0,0}
    };
    while ((c = getopt_long(argc, argv, "hlr:R:f:a:s:S:Ht:T:c:v:i:e:o:u",loptions,NULL)) >= 0) {
        switch (c) {
            case 'o': args->fn_out = optarg; break;
            case 'f': args->format_str = strdup(optarg); break;
            case 'H': args->print_header = 1; break;
            case 'v': args->vcf_list = optarg; break;
            case 'c':
                if ( !strcmp(optarg,"snps") ) collapse |= COLLAPSE_SNPS;
                else if ( !strcmp(optarg,"indels") ) collapse |= COLLAPSE_INDELS;
                else if ( !strcmp(optarg,"both") ) collapse |= COLLAPSE_SNPS | COLLAPSE_INDELS;
                else if ( !strcmp(optarg,"any") ) collapse |= COLLAPSE_ANY;
                else if ( !strcmp(optarg,"all") ) collapse |= COLLAPSE_ANY;
                else if ( !strcmp(optarg,"some") ) collapse |= COLLAPSE_SOME;
                else error("The --collapse string \"%s\" not recognised.\n", optarg);
                break;
            case 'a':
                {
                    kstring_t str = {0,0,0};
                    kputs("%CHROM\t%POS\t%MASK\t%REF\t%ALT\t%", &str);
                    char *p = optarg;
                    while ( *p )
                    {
                        if ( *p==',' )
                            kputs("\t%", &str);
                        else
                            kputc(*p, &str);
                        p++;
                    }
                    kputc('\n', &str);
                    args->format_str = str.s;
                    break;
                }
            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
            case 'r': args->regions_list = optarg; break;
            case 'R': args->regions_list = optarg; regions_is_file = 1; break;
            case 't': args->targets_list = optarg; break;
            case 'T': args->targets_list = optarg; targets_is_file = 1; break;
            case 'l': args->list_columns = 1; break;
            case 'u': args->allow_undef_tags = 1; break;
            case 's': args->sample_list = optarg; break;
            case 'S': args->sample_list = optarg; args->sample_is_file = 1; break;
            case 'h':
            case '?': usage();
            default: error("Unknown argument: %s\n", optarg);
        }
    }

    char *fname = NULL;
    if ( optind>=argc )
    {
        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";
    }
    else fname = argv[optind];

    if ( args->list_columns )
    {
        if ( !fname ) error("Missing the VCF file name\n");
        args->files = bcf_sr_init();
        if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
        list_columns(args);
        bcf_sr_destroy(args->files);
        free(args);
        return 0;
    }

    if ( !args->format_str ) usage();
    args->out = args->fn_out ? fopen(args->fn_out, "w") : stdout;
    if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));

    if ( !args->vcf_list )
    {
        if ( !fname ) usage();
        args->files = bcf_sr_init();
        args->files->collapse = collapse;
        if ( optind+1 < argc ) args->files->require_index = 1;
        if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
        if ( args->targets_list )
        {
            if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 )
                error("Failed to read the targets: %s\n", args->targets_list);
        }
        while ( fname )
        {
            if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
            fname = ++optind < argc ? argv[optind] : NULL;
        }
        init_data(args);
        query_vcf(args);
        free(args->format_str);
        destroy_data(args);
        bcf_sr_destroy(args->files);
        fclose(args->out);
        free(args);
        return 0;
    }

    // multiple VCFs
    int i, k, nfiles, prev_nsamples = 0;
    char **fnames, **prev_samples = NULL;
    fnames = hts_readlist(args->vcf_list, 1, &nfiles);
    if ( !nfiles ) error("No files in %s?\n", args->vcf_list);
    for (i=0; i<nfiles; i++)
    {
        args->files = bcf_sr_init();
        args->files->collapse = collapse;
        if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
        if ( optind < argc ) args->files->require_index = 1;
        if ( args->targets_list )
        {
            if ( bcf_sr_set_targets(args->files, args->targets_list,targets_is_file, 0)<0 )
                error("Failed to read the targets: %s\n", args->targets_list);
        }
        if ( !bcf_sr_add_reader(args->files, fnames[i]) ) error("Failed to open %s: %s\n", fnames[i],bcf_sr_strerror(args->files->errnum));
        for (k=optind; k<argc; k++)
            if ( !bcf_sr_add_reader(args->files, argv[k]) ) error("Failed to open %s: %s\n", argv[k],bcf_sr_strerror(args->files->errnum));
        init_data(args);
        if ( i==0 )
            prev_samples = copy_header(args->header, args->files->readers[0].header->samples, bcf_hdr_nsamples(args->files->readers[0].header));
        else
        {
            args->print_header = 0;
            if ( compare_header(args->header, args->files->readers[0].header->samples, bcf_hdr_nsamples(args->files->readers[0].header), prev_samples, prev_nsamples) )
                error("Different samples in %s and %s\n", fnames[i-1],fnames[i]);
        }
        query_vcf(args);
        destroy_data(args);
        bcf_sr_destroy(args->files);
    }
    fclose(args->out);
    destroy_list(fnames, nfiles);
    destroy_list(prev_samples, prev_nsamples);
    free(args->format_str);
    free(args);
    return 0;
}
Exemplo n.º 11
0
   void set_target(const std::string &region) {
   if (bcf_sr_set_targets(m_sr, region.c_str(), false, 0) != 0)
     throw std::runtime_error("Could not set target: [" + region + "]");
   assert(m_sr->require_index == 0);
 }
Exemplo n.º 12
0
static void init_data(args_t *args)
{
    args->aux.srs = bcf_sr_init();

    // Open files for input and output, initialize structures
    if ( args->targets )
    {
        if ( bcf_sr_set_targets(args->aux.srs, args->targets, args->targets_is_file, args->aux.flag&CALL_CONSTR_ALLELES ? 3 : 0)<0 )
            error("Failed to read the targets: %s\n", args->targets);

        if ( args->aux.flag&CALL_CONSTR_ALLELES && args->flag&CF_INS_MISSED )
        {
            args->aux.srs->targets->missed_reg_handler = print_missed_line;
            args->aux.srs->targets->missed_reg_data = args;
        }
    }
    if ( args->regions )
    {
        if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
            error("Failed to read the targets: %s\n", args->regions);
    }

    if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open %s: %s\n", args->bcf_fname,bcf_sr_strerror(args->aux.srs->errnum));
    args->aux.hdr = bcf_sr_get_header(args->aux.srs,0);

    int i;
    if ( args->samples_fname )
    {
        set_samples(args, args->samples_fname, args->samples_is_file);
        if ( args->aux.flag&CALL_CONSTR_TRIO )
        {
            if ( 3*args->aux.nfams!=args->nsamples ) error("Expected only trios in %s, sorry!\n", args->samples_fname);
            fprintf(stderr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
        }
        args->nsex = ploidy_nsex(args->ploidy);
        args->sex2ploidy = (int*) calloc(args->nsex,sizeof(int));
        args->sex2ploidy_prev = (int*) calloc(args->nsex,sizeof(int));
        args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
        for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
        for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
    }

    if ( args->samples_map )
    {
        args->aux.hdr = bcf_hdr_subset(bcf_sr_get_header(args->aux.srs,0), args->nsamples, args->samples, args->samples_map);
        if ( !args->aux.hdr ) error("Error occurred while subsetting samples\n");
        for (i=0; i<args->nsamples; i++)
            if ( args->samples_map[i]<0 ) error("No such sample: %s\n", args->samples[i]);
        if ( !bcf_hdr_nsamples(args->aux.hdr) ) error("No matching sample found\n");
    }
    else
    {
        args->aux.hdr = bcf_hdr_dup(bcf_sr_get_header(args->aux.srs,0));
        for (i=0; i<args->nsamples; i++)
            if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
                error("No such sample: %s\n", args->samples[i]);
    }

    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
    if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));

    if ( args->flag & CF_QCALL )
        return;

    if ( args->flag & CF_MCALL )
        mcall_init(&args->aux);

    if ( args->flag & CF_CCALL )
        ccall_init(&args->aux);

    if ( args->flag&CF_GVCF )
    {
        bcf_hdr_append(args->aux.hdr,"##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
        args->gvcf.rid  = -1;
        args->gvcf.line = bcf_init1();
        args->gvcf.gt   = (int32_t*) malloc(2*sizeof(int32_t)*bcf_hdr_nsamples(args->aux.hdr));
        for (i=0; i<bcf_hdr_nsamples(args->aux.hdr); i++)
        {
            args->gvcf.gt[2*i+0] = bcf_gt_unphased(0);
            args->gvcf.gt[2*i+1] = bcf_gt_unphased(0);
        }
    }

    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");

    bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
    bcf_hdr_write(args->out_fh, args->aux.hdr);

    if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
}