コード例 #1
0
ファイル: vcfgtcheck.c プロジェクト: kgururaj/bcftools
int main_vcfgtcheck(int argc, char *argv[])
{
    int c;
    args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->files  = bcf_sr_init();
    args->argc   = argc;
    args->argv = argv;
    set_cwd(args);
    char *regions = NULL, *targets = NULL;
    int regions_is_file = 0, targets_is_file = 0;

    static struct option loptions[] =
    {
        {"GTs-only",1,0,'G'},
        {"all-sites",0,0,'a'},
        {"homs-only",0,0,'H'},
        {"help",0,0,'h'},
        {"genotypes",1,0,'g'},
        {"plot",1,0,'p'},
        {"target-sample",1,0,'S'},
        {"query-sample",1,0,'s'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {0,0,0,0}
    };
    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:",loptions,NULL)) >= 0) {
        switch (c) {
        case 'G':
            args->no_PLs = atoi(optarg);
            break;
        case 'a':
            args->all_sites = 1;
            break;
        case 'H':
            args->hom_only = 1;
            break;
        case 'g':
            args->gt_fname = optarg;
            break;
        case 'p':
            args->plot = optarg;
            break;
        case 'S':
            args->target_sample = optarg;
            break;
        case 's':
            args->query_sample = optarg;
            break;
        case 'r':
            regions = optarg;
            break;
        case 'R':
            regions = optarg;
            regions_is_file = 1;
            break;
        case 't':
            targets = optarg;
            break;
        case 'T':
            targets = optarg;
            targets_is_file = 1;
            break;
        case 'h':
        case '?':
            usage();
        default:
            error("Unknown argument: %s\n", optarg);
        }
    }
    if ( argc==optind || argc>optind+1 )  usage();  // none or too many files given
    if ( !args->gt_fname ) args->cross_check = 1;   // no genotype file, run in cross-check mode
    else args->files->require_index = 1;
    if ( regions && bcf_sr_set_regions(args->files, regions, regions_is_file)<0 ) error("Failed to read the regions: %s\n", regions);
    if ( targets && bcf_sr_set_targets(args->files, targets, targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", targets);
    if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open or the file not indexed: %s\n", argv[optind]);
    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) ) error("Failed to open or the file not indexed: %s\n", args->gt_fname);
    args->files->collapse = COLLAPSE_SNPS|COLLAPSE_INDELS;
    if ( args->plot ) args->plot = init_prefix(args->plot);
    init_data(args);
    if ( args->cross_check )
        cross_check_gts(args);
    else
        check_gt(args);
    destroy_data(args);
    bcf_sr_destroy(args->files);
    if (args->plot) free(args->plot);
    free(args);
    return 0;
}
コード例 #2
0
ファイル: vcfroh.c.pysam.c プロジェクト: anykine/pysam
int main_vcfroh(int argc, char *argv[])
{
    int c;
    args_t *args  = (args_t*) calloc(1,sizeof(args_t));
    args->argc    = argc; args->argv = argv;
    args->files   = bcf_sr_init();
    args->t2AZ    = 6.7e-8;
    args->t2HW    = 5e-9;
    args->rec_rate = 0;
    int regions_is_file = 0, targets_is_file = 0;

    static struct option loptions[] =
    {
        {"AF-tag",1,0,0},
        {"AF-file",1,0,1},
        {"AF-dflt",1,0,2},
        {"estimate-AF",1,0,'e'},
        {"GTs-only",1,0,'G'},
        {"sample",1,0,'s'},
        {"hw-to-az",1,0,'a'},
        {"az-to-hw",1,0,'H'},
        {"viterbi-training",0,0,'V'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"genetic-map",1,0,'m'},
        {"rec-rate",1,0,'M'},
        {"skip-indels",0,0,'I'},
        {0,0,0,0}
    };

    int naf_opts = 0;
    char *tmp;
    while ((c = getopt_long(argc, argv, "h?r:R:t:T:H:a:s:m:M:G:Ia:e:V",loptions,NULL)) >= 0) {
        switch (c) {
            case 0: args->af_tag = optarg; naf_opts++; break;
            case 1: args->af_fname = optarg; naf_opts++; break;
            case 2: 
                args->dflt_AF = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
                break;
            case 'e': args->estimate_AF = optarg; naf_opts++; break;
            case 'I': args->snps_only = 1; break;
            case 'G':
                args->fake_PLs = 1; 
                args->unseen_PL = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -G %s\n", optarg);
                args->unseen_PL = pow(10,-args->unseen_PL/10.); 
                break;
            case 'm': args->genmap_fname = optarg; break;
            case 'M':
                args->rec_rate = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -M %s\n", optarg);
                break;
            case 's': args->sample = strdup(optarg); break;
            case 'a':
                args->t2AZ = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -a %s\n", optarg);
                break;
            case 'H':
                args->t2HW = strtod(optarg,&tmp);
                if ( *tmp ) error("Could not parse: -H %s\n", optarg);
                break;
            case 't': args->targets_list = optarg; break;
            case 'T': args->targets_list = optarg; targets_is_file = 1; break;
            case 'r': args->regions_list = optarg; break;
            case 'R': args->regions_list = optarg; regions_is_file = 1; break;
            case 'V': args->vi_training = 1; break;
            case 'h': 
            case '?': usage(args); break;
            default: error("Unknown argument: %s\n", optarg);
        }
    }

    if ( argc<optind+1 ) usage(args);
    if ( args->t2AZ<0 || args->t2AZ>1 ) error("Error: The parameter --hw-to-az is not in [0,1]\n", args->t2AZ);
    if ( args->t2HW<0 || args->t2HW>1 ) error("Error: The parameter --az-to-hw is not in [0,1]\n", args->t2HW);
    if ( naf_opts>1 ) error("Error: The options --AF-tag, --AF-file and -e are mutually exclusive\n");
    if ( args->af_fname && args->targets_list ) error("Error: The options --AF-file and -t are mutually exclusive\n");
    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
    }
    if ( args->af_fname )
    {
        if ( bcf_sr_set_targets(args->files, args->af_fname, 1, 3)<0 )
            error("Failed to read the targets: %s\n", args->af_fname);
    }
    if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open %s: %s\n", argv[optind],bcf_sr_strerror(args->files->errnum));

    init_data(args);
    while ( bcf_sr_next_line(args->files) )
    {
        vcfroh(args, args->files->readers[0].buffer[0]);
    }
    vcfroh(args, NULL);
    fprintf(pysamerr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
    destroy_data(args);
    free(args);
    return 0;
}
コード例 #3
0
ファイル: vcfconcat.c プロジェクト: andrewjesaitis/bcftools
static void init_data(args_t *args)
{
    bcf1_t *line = NULL;

    // With phased concat, the chunks overlap and come in the right order.  To
    // avoid opening all files at once, store start positions to recognise need
    // for the next one. This way we can keep only two open chunks at once.
    if ( args->phased_concat )
    {
        args->start_pos = (int*) malloc(sizeof(int)*args->nfnames);
        line = bcf_init();
    }

    kstring_t str = {0,0,0};
    int i, prev_chrid = -1;
    for (i=0; i<args->nfnames; i++)
    {
        htsFile *fp = hts_open(args->fnames[i], "r"); if ( !fp ) error("Failed to open: %s\n", args->fnames[i]);
        bcf_hdr_t *hdr = bcf_hdr_read(fp); if ( !hdr ) error("Failed to parse header: %s\n", args->fnames[i]);
        if ( !args->out_hdr )
            args->out_hdr = bcf_hdr_dup(hdr);
        else
        {
            bcf_hdr_combine(args->out_hdr, hdr);

            if ( bcf_hdr_nsamples(hdr) != bcf_hdr_nsamples(args->out_hdr) )
                error("Different number of samples in %s. Perhaps \"bcftools merge\" is what you are looking for?\n", args->fnames[i]);

            int j;
            for (j=0; j<bcf_hdr_nsamples(hdr); j++)
                if ( strcmp(args->out_hdr->samples[j],hdr->samples[j]) )
                    error("Different sample names in %s. Perhaps \"bcftools merge\" is what you are looking for?\n", args->fnames[i]);
        }
        if ( args->phased_concat )
        {
            int ret = bcf_read(fp, hdr, line);
            if ( ret!=0 ) args->start_pos[i] = -2;  // empty file
            else
            {
                int chrid = bcf_hdr_id2int(args->out_hdr,BCF_DT_CTG,bcf_seqname(hdr,line));
                args->start_pos[i] = chrid==prev_chrid ? line->pos : -1;
                prev_chrid = chrid;
            }
        }
        bcf_hdr_destroy(hdr);
        hts_close(fp);
    }
    free(str.s);
    if ( line ) bcf_destroy(line);

    args->seen_seq = (int*) calloc(args->out_hdr->n[BCF_DT_CTG],sizeof(int));

    if ( args->phased_concat )
    {
        bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PQ,Number=1,Type=Integer,Description=\"Phasing Quality (bigger is better)\">");
        bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase Set\">");
    }
    bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
    args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
    if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));

    bcf_hdr_write(args->out_fh, args->out_hdr);

    if ( args->allow_overlaps )
    {
        args->files = bcf_sr_init();
        args->files->require_index = 1;
        if ( args->regions_list )
        {
            if ( bcf_sr_set_regions(args->files, args->regions_list, args->regions_is_file)<0 )
                error("Failed to read the regions: %s\n", args->regions_list);
        }
        for (i=0; i<args->nfnames; i++)
            if ( !bcf_sr_add_reader(args->files,args->fnames[i]) ) error("Failed to open %s: %s\n", args->fnames[i],bcf_sr_strerror(args->files->errnum));
    }
    else if ( args->phased_concat )
    {
        // Remove empty files from the list
        int nok = 0;
        while (1)
        {
            while ( nok<args->nfnames && args->start_pos[nok]!=-2 ) nok++;
            if ( nok==args->nfnames ) break;

            i = nok;
            while ( i<args->nfnames && args->start_pos[i]==-2 ) i++;
            if ( i==args->nfnames ) break;

            int tmp = args->start_pos[nok]; args->start_pos[nok] = args->start_pos[i]; args->start_pos[i] = tmp;
            char *str = args->fnames[nok]; args->fnames[nok] = args->fnames[i]; args->fnames[i] = str;
        }
        for (i=nok; i<args->nfnames; i++) free(args->fnames[i]);
        args->nfnames = nok;

        for (i=1; i<args->nfnames; i++)
            if ( args->start_pos[i-1]!=-1 && args->start_pos[i]!=-1 && args->start_pos[i]<args->start_pos[i-1] )
                error("The files not in ascending order: %d in %s, %d in %s\n", args->start_pos[i-1]+1,args->fnames[i-1],args->start_pos[i]+1,args->fnames[i]);

        args->prev_chr = -1;
        args->swap_phase = (int*) calloc(bcf_hdr_nsamples(args->out_hdr),sizeof(int));
        args->nmatch = (int*) calloc(bcf_hdr_nsamples(args->out_hdr),sizeof(int));
        args->nmism  = (int*) calloc(bcf_hdr_nsamples(args->out_hdr),sizeof(int));
        args->phase_qual = (int32_t*) malloc(bcf_hdr_nsamples(args->out_hdr)*sizeof(int32_t));
        args->phase_set  = (int32_t*) malloc(bcf_hdr_nsamples(args->out_hdr)*sizeof(int32_t));
        args->files = bcf_sr_init();
        args->files->require_index = 1;
        args->ifname = 0;
    }
}
コード例 #4
0
void genrich_data::readReferenceGenotypes(string fvcf) {
	vector < int > mappingS;

	//Opening files
	vrb.title("Reading variant list in [" + fvcf + "] MAF=" + stb.str(threshold_maf));
	bcf_srs_t * sr =  bcf_sr_init();
	if(!(bcf_sr_add_reader (sr, fvcf.c_str()))) {
		switch (sr->errnum) {
		case not_bgzf: vrb.error("File not compressed with bgzip!");
		case idx_load_failed: vrb.error("Impossible to load index file!");
		case file_type_error: vrb.error("File format not detected by htslib!");
		default : vrb.error("Unknown error!");
		}
	}

	//Sample processing
	int included_sample = 0;
	int n_samples = bcf_hdr_nsamples(sr->readers[0].header);
	for (int i = 0 ; i < n_samples ; i ++) {
		mappingS.push_back(findSample(string(sr->readers[0].header->samples[i])));
		if (mappingS.back() >= 0) included_sample ++;
	}
	vrb.bullet("#samples = " + stb.str(included_sample));

	//Variant processing
	unsigned int n_excludedV_mult = 0, n_excludedV_void = 0, n_excludedV_rare = 0, n_excludedV_uchr = 0, n_line = 0, n_excludedV_toofar = 0;
	int ngt, ngt_arr = 0, *gt_arr = NULL;
	bcf1_t * line;
	while(bcf_sr_next_line (sr)) {
		line =  bcf_sr_get_line(sr, 0);
		if (line->n_allele == 2) {
			bcf_unpack(line, BCF_UN_STR);
			string sid = string(line->d.id);
			string chr = string(bcf_hdr_id2name(sr->readers[0].header, line->rid));
			int chr_idx = findCHR(chr);
			if (chr_idx >= 0) {
				unsigned int pos = line->pos + 1;
				ngt = bcf_get_genotypes(sr->readers[0].header, line, &gt_arr, &ngt_arr);
				if (ngt == 2*n_samples) {
					double freq = 0.0, tot = 0.0;
					for(int i = 0 ; i < n_samples ; i ++) {
						assert(gt_arr[2*i+0] != bcf_gt_missing && gt_arr[2*i+1] != bcf_gt_missing);
						if (mappingS[i] >= 0) {
							freq += bcf_gt_allele(gt_arr[2*i+0]) + bcf_gt_allele(gt_arr[2*i+1]);
							tot += 2.0;
						}
					}
					double maf = freq / tot;
					if (maf > 0.5) maf = 1.0 - maf;
					if (maf >= threshold_maf) {
						int dist_tss = getDistance(chr_idx, pos);
						if (dist_tss < 1e6) {
							string tmp_id = chr + "_" + stb.str(pos);
							genotype_uuid.insert(pair < string, unsigned int > (tmp_id, genotype_pos.size()));
							genotype_chr.push_back(chr_idx);
							genotype_pos.push_back(pos);
							genotype_maf.push_back(maf);
							genotype_dist.push_back(dist_tss);
							genotype_haps.push_back(vector < bool > (2 * included_sample, false));
							for(int i = 0 ; i < n_samples ; i ++) {
								if (mappingS[i] >= 0) {
									genotype_haps.back()[2 * mappingS[i] + 0] = bcf_gt_allele(gt_arr[2 * i + 0]);
									genotype_haps.back()[2 * mappingS[i] + 1] = bcf_gt_allele(gt_arr[2 * i + 1]);
								}
							}
						} else n_excludedV_toofar++;
					} else n_excludedV_rare ++;
				} else n_excludedV_void ++;
			} else n_excludedV_uchr ++;
		} else n_excludedV_mult ++;

		if (n_line % 100000 == 0) vrb.bullet("#lines = " + stb.str(n_line));

		n_line ++;
 	}
	genotype_qtl = vector < bool > (genotype_pos.size(), false);
	genotype_gwas = vector < bool > (genotype_pos.size(), false);
	genotype_bin = vector < int > (genotype_pos.size(), -1);

	//Finalize
	bcf_sr_destroy(sr);
	vrb.bullet(stb.str(genotype_pos.size()) + " variants included");
	if (n_excludedV_mult > 0) vrb.bullet(stb.str(n_excludedV_mult) + " multi-allelic variants excluded");
	if (n_excludedV_uchr > 0) vrb.bullet(stb.str(n_excludedV_uchr) + " variants with unreferenced chromosome in --tss");
	if (n_excludedV_rare > 0) vrb.bullet(stb.str(n_excludedV_rare) + " maf filtered variants");
	if (n_excludedV_toofar > 0) vrb.bullet(stb.str(n_excludedV_toofar) + " too far variants");
}
コード例 #5
0
ファイル: vcfisec.c プロジェクト: wtsi-hgi/bcftools
int main_vcfisec(int argc, char *argv[])
{
	int c;
	args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->files  = bcf_sr_init();
	args->argc   = argc; args->argv = argv;
    args->output_type = FT_VCF;

	static struct option loptions[] = 
	{
		{"help",0,0,'h'},
		{"collapse",1,0,'c'},
		{"complement",1,0,'C'},
		{"apply-filters",1,0,'f'},
		{"nfiles",1,0,'n'},
		{"prefix",1,0,'p'},
		{"write",1,0,'w'},
		{"targets",1,0,'t'},
		{"regions",1,0,'r'},
		{"output-type",1,0,'O'},
		{0,0,0,0}
	};
	while ((c = getopt_long(argc, argv, "hc:r:p:n:w:t:Cf:O:",loptions,NULL)) >= 0) {
		switch (c) {
			case 'O': 
                switch (optarg[0]) {
                    case 'b': args->output_type = FT_BCF_GZ; break;
                    case 'u': args->output_type = FT_BCF; break;
                    case 'z': args->output_type = FT_VCF_GZ; break;
                    case 'v': args->output_type = FT_VCF; break;
                    default: error("The output type \"%s\" not recognised\n", optarg);
                }
                break;
			case 'c':
				if ( !strcmp(optarg,"snps") ) args->files->collapse |= COLLAPSE_SNPS;
				else if ( !strcmp(optarg,"indels") ) args->files->collapse |= COLLAPSE_INDELS;
				else if ( !strcmp(optarg,"both") ) args->files->collapse |= COLLAPSE_SNPS | COLLAPSE_INDELS;
				else if ( !strcmp(optarg,"any") ) args->files->collapse |= COLLAPSE_ANY;
				else if ( !strcmp(optarg,"all") ) args->files->collapse |= COLLAPSE_ANY;
				else if ( !strcmp(optarg,"some") ) args->files->collapse |= COLLAPSE_SOME;
				else if ( !strcmp(optarg,"none") ) args->files->collapse = COLLAPSE_NONE;
                else error("The --collapse string \"%s\" not recognised.\n", optarg);
				break;
			case 'f': args->files->apply_filters = optarg; break;
			case 'C': args->isec_op = OP_COMPLEMENT; break;
			case 'r': args->regions_fname = optarg; break;
			case 't': args->targets_fname = optarg; break;
			case 'p': args->prefix = optarg; break;
			case 'w': args->write_files = optarg; break;
			case 'n': 
                {
                    char *p = optarg;
                    if ( *p=='-' ) { args->isec_op = OP_MINUS; p++; }
                    else if ( *p=='+' ) { args->isec_op = OP_PLUS; p++; }
                    else if ( *p=='=' ) { args->isec_op = OP_EQUAL; p++; }
                    else if ( isdigit(*p) ) args->isec_op = OP_EQUAL;
                    else error("Could not parse --nfiles %s\n", optarg);
                    if ( sscanf(p,"%d",&args->isec_n)!=1 ) error("Could not parse --nfiles %s\n", optarg);
                }
                break;
			case 'h': 
			case '?': usage();
			default: error("Unknown argument: %s\n", optarg);
		}
	}
	if ( argc-optind<1 ) usage();   // no file given
    if ( args->targets_fname && bcf_sr_set_targets(args->files, args->targets_fname,0)<0 )
        error("Failed to read the targets: %s\n", args->targets_fname);
    if ( args->regions_fname && bcf_sr_set_regions(args->files, args->regions_fname)<0 )
        error("Failed to read the regions: %s\n", args->regions_fname);
    if ( argc-optind==2 && !args->isec_op ) 
    {
        args->isec_op = OP_VENN;
        if ( !args->prefix ) error("Expected the -p option\n");
    }
    if ( !args->targets_fname )
    {
        if ( argc-optind<2  ) error("Expected multiple files or the --targets option\n");
        if ( !args->isec_op ) error("Expected two file names or one of the options --complement, --nfiles or --targets\n");
    }
    args->files->require_index = 1;
	while (optind<argc)
	{
		if ( !bcf_sr_add_reader(args->files, argv[optind]) ) error("Failed to open: %s\n", argv[optind]);
		optind++;
	}
    init_data(args);
	isec_vcf(args);
    destroy_data(args);
	bcf_sr_destroy(args->files);
	free(args);
	return 0;
}
コード例 #6
0
ファイル: vcfquery.c プロジェクト: msto/pysam
int main_vcfquery(int argc, char *argv[])
{
    int c, collapse = 0;
    args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->argc   = argc; args->argv = argv;
    int regions_is_file = 0, targets_is_file = 0;

    static struct option loptions[] =
    {
        {"help",0,0,'h'},
        {"list-samples",0,0,'l'},
        {"include",1,0,'i'},
        {"exclude",1,0,'e'},
        {"format",1,0,'f'},
        {"output-file",1,0,'o'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {"annots",1,0,'a'},
        {"samples",1,0,'s'},
        {"samples-file",1,0,'S'},
        {"print-header",0,0,'H'},
        {"collapse",1,0,'c'},
        {"vcf-list",1,0,'v'},
        {"allow-undef-tags",0,0,'u'},
        {0,0,0,0}
    };
    while ((c = getopt_long(argc, argv, "hlr:R:f:a:s:S:Ht:T:c:v:i:e:o:u",loptions,NULL)) >= 0) {
        switch (c) {
            case 'o': args->fn_out = optarg; break;
            case 'f': args->format_str = strdup(optarg); break;
            case 'H': args->print_header = 1; break;
            case 'v': args->vcf_list = optarg; break;
            case 'c':
                if ( !strcmp(optarg,"snps") ) collapse |= COLLAPSE_SNPS;
                else if ( !strcmp(optarg,"indels") ) collapse |= COLLAPSE_INDELS;
                else if ( !strcmp(optarg,"both") ) collapse |= COLLAPSE_SNPS | COLLAPSE_INDELS;
                else if ( !strcmp(optarg,"any") ) collapse |= COLLAPSE_ANY;
                else if ( !strcmp(optarg,"all") ) collapse |= COLLAPSE_ANY;
                else if ( !strcmp(optarg,"some") ) collapse |= COLLAPSE_SOME;
                else error("The --collapse string \"%s\" not recognised.\n", optarg);
                break;
            case 'a':
                {
                    kstring_t str = {0,0,0};
                    kputs("%CHROM\t%POS\t%MASK\t%REF\t%ALT\t%", &str);
                    char *p = optarg;
                    while ( *p )
                    {
                        if ( *p==',' )
                            kputs("\t%", &str);
                        else
                            kputc(*p, &str);
                        p++;
                    }
                    kputc('\n', &str);
                    args->format_str = str.s;
                    break;
                }
            case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
            case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
            case 'r': args->regions_list = optarg; break;
            case 'R': args->regions_list = optarg; regions_is_file = 1; break;
            case 't': args->targets_list = optarg; break;
            case 'T': args->targets_list = optarg; targets_is_file = 1; break;
            case 'l': args->list_columns = 1; break;
            case 'u': args->allow_undef_tags = 1; break;
            case 's': args->sample_list = optarg; break;
            case 'S': args->sample_list = optarg; args->sample_is_file = 1; break;
            case 'h':
            case '?': usage();
            default: error("Unknown argument: %s\n", optarg);
        }
    }

    char *fname = NULL;
    if ( optind>=argc )
    {
        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";
    }
    else fname = argv[optind];

    if ( args->list_columns )
    {
        if ( !fname ) error("Missing the VCF file name\n");
        args->files = bcf_sr_init();
        if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
        list_columns(args);
        bcf_sr_destroy(args->files);
        free(args);
        return 0;
    }

    if ( !args->format_str ) usage();
    args->out = args->fn_out ? fopen(args->fn_out, "w") : stdout;
    if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));

    if ( !args->vcf_list )
    {
        if ( !fname ) usage();
        args->files = bcf_sr_init();
        args->files->collapse = collapse;
        if ( optind+1 < argc ) args->files->require_index = 1;
        if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
        if ( args->targets_list )
        {
            if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 )
                error("Failed to read the targets: %s\n", args->targets_list);
        }
        while ( fname )
        {
            if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
            fname = ++optind < argc ? argv[optind] : NULL;
        }
        init_data(args);
        query_vcf(args);
        free(args->format_str);
        destroy_data(args);
        bcf_sr_destroy(args->files);
        fclose(args->out);
        free(args);
        return 0;
    }

    // multiple VCFs
    int i, k, nfiles, prev_nsamples = 0;
    char **fnames, **prev_samples = NULL;
    fnames = hts_readlist(args->vcf_list, 1, &nfiles);
    if ( !nfiles ) error("No files in %s?\n", args->vcf_list);
    for (i=0; i<nfiles; i++)
    {
        args->files = bcf_sr_init();
        args->files->collapse = collapse;
        if ( args->regions_list && bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
        if ( optind < argc ) args->files->require_index = 1;
        if ( args->targets_list )
        {
            if ( bcf_sr_set_targets(args->files, args->targets_list,targets_is_file, 0)<0 )
                error("Failed to read the targets: %s\n", args->targets_list);
        }
        if ( !bcf_sr_add_reader(args->files, fnames[i]) ) error("Failed to open %s: %s\n", fnames[i],bcf_sr_strerror(args->files->errnum));
        for (k=optind; k<argc; k++)
            if ( !bcf_sr_add_reader(args->files, argv[k]) ) error("Failed to open %s: %s\n", argv[k],bcf_sr_strerror(args->files->errnum));
        init_data(args);
        if ( i==0 )
            prev_samples = copy_header(args->header, args->files->readers[0].header->samples, bcf_hdr_nsamples(args->files->readers[0].header));
        else
        {
            args->print_header = 0;
            if ( compare_header(args->header, args->files->readers[0].header->samples, bcf_hdr_nsamples(args->files->readers[0].header), prev_samples, prev_nsamples) )
                error("Different samples in %s and %s\n", fnames[i-1],fnames[i]);
        }
        query_vcf(args);
        destroy_data(args);
        bcf_sr_destroy(args->files);
    }
    fclose(args->out);
    destroy_list(fnames, nfiles);
    destroy_list(prev_samples, prev_nsamples);
    free(args->format_str);
    free(args);
    return 0;
}
コード例 #7
0
ファイル: vcfgtcheck.c プロジェクト: samtools/bcftools
int main_vcfgtcheck(int argc, char *argv[])
{
    int c;
    args_t *args = (args_t*) calloc(1,sizeof(args_t));
    args->files  = bcf_sr_init();
    args->argc   = argc; args->argv = argv; set_cwd(args);
    char *regions = NULL, *targets = NULL;
    int regions_is_file = 0, targets_is_file = 0;

    // In simulated sample swaps the minimum error was 0.3 and maximum intra-sample error was 0.23
    //    - min_inter: pairs with smaller err value will be considered identical 
    //    - max_intra: pairs with err value bigger than abs(max_intra_err) will be considered
    //                  different. If negative, the cutoff may be heuristically lowered
    args->min_inter_err =  0.23;
    args->max_intra_err = -0.3;

    static struct option loptions[] =
    {
        {"cluster",1,0,'c'},
        {"GTs-only",1,0,'G'},
        {"all-sites",0,0,'a'},
        {"homs-only",0,0,'H'},
        {"help",0,0,'h'},
        {"genotypes",1,0,'g'},
        {"plot",1,0,'p'},
        {"target-sample",1,0,'S'},
        {"query-sample",1,0,'s'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {0,0,0,0}
    };
    char *tmp;
    while ((c = getopt_long(argc, argv, "hg:p:s:S:Hr:R:at:T:G:c:",loptions,NULL)) >= 0) {
        switch (c) {
            case 'c':
                args->min_inter_err = strtod(optarg,&tmp);
                if ( *tmp )
                {
                    if ( *tmp!=',') error("Could not parse: -c %s\n", optarg);
                    args->max_intra_err = strtod(tmp+1,&tmp);
                    if ( *tmp ) error("Could not parse: -c %s\n", optarg);
                }
                break;
            case 'G':
                args->no_PLs = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: --GTs-only %s\n", optarg);
                break;
            case 'a': args->all_sites = 1; break;
            case 'H': args->hom_only = 1; break;
            case 'g': args->gt_fname = optarg; break;
            case 'p': args->plot = optarg; break;
            case 'S': args->target_sample = optarg; break;
            case 's': args->query_sample = optarg; break;
            case 'r': regions = optarg; break;
            case 'R': regions = optarg; regions_is_file = 1; break;
            case 't': targets = optarg; break;
            case 'T': targets = optarg; targets_is_file = 1; break;
            case 'h':
            case '?': usage();
            default: error("Unknown argument: %s\n", optarg);
        }
    }
    char *fname = NULL;
    if ( optind==argc )
    {
        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
        else usage();   // no files given
    }
    else fname = argv[optind];
    if ( argc>optind+1 )  usage();  // too many files given
    if ( !args->gt_fname ) args->cross_check = 1;   // no genotype file, run in cross-check mode
    else args->files->require_index = 1;
    if ( regions && bcf_sr_set_regions(args->files, regions, regions_is_file)<0 ) error("Failed to read the regions: %s\n", regions);
    if ( targets && bcf_sr_set_targets(args->files, targets, targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", targets);
    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
    if ( args->gt_fname && !bcf_sr_add_reader(args->files, args->gt_fname) ) error("Failed to open %s: %s\n", args->gt_fname,bcf_sr_strerror(args->files->errnum));
    args->files->collapse = COLLAPSE_SNPS|COLLAPSE_INDELS;
    if ( args->plot ) args->plot = init_prefix(args->plot);
    init_data(args);
    if ( args->cross_check )
        cross_check_gts(args);
    else
        check_gt(args);
    destroy_data(args);
    bcf_sr_destroy(args->files);
    if (args->plot) free(args->plot);
    free(args);
    return 0;
}
コード例 #8
0
ファイル: vcf2sex.c プロジェクト: MMesbahU/bcftools
int run(int argc, char **argv)
{
    args_t *args  = (args_t*) calloc(1,sizeof(args_t));
    args->nsites = 10;
    args->min_hets = 0.3;
    args->background = "X:60001-2699520";
    static struct option loptions[] =
    {
        {"verbose",1,0,'v'},
        {"ploidy",1,0,'p'},
        {"nsites",1,0,'n'},
        {"guess",1,0,'g'},
        {"min-hets",1,0,'m'},
        {"background",1,0,'b'},
        {0,0,0,0}
    };
    char c, *tmp, *ploidy_fname = NULL;
    while ((c = getopt_long(argc, argv, "p:n:g:m:vb:",loptions,NULL)) >= 0)
    {
        switch (c) {
            case 'b': 
                if ( !strcmp("-",optarg) ) args->background = NULL;
                else args->background = optarg; 
                break; 
            case 'v': args->verbose = 1; break; 
            case 'g':
                if ( !strcasecmp(optarg,"GT") ) args->guess = GUESS_GT;
                else if ( !strcasecmp(optarg,"PL") ) args->guess = GUESS_PL;
                else if ( !strcasecmp(optarg,"GL") ) args->guess = GUESS_GL;
                else error("The argument not recognised, expected --guess GT, --guess PL or --guess GL: %s\n", optarg);
                break;
            case 'm': 
                args->min_hets = strtod(optarg,&tmp); 
                if ( *tmp ) error("Unexpected argument to --min-hets: %s\n", optarg);
                break; 
            case 'p': ploidy_fname = optarg; break; 
            case 'n': 
                args->nsites = strtol(optarg,&tmp,10); 
                if (*tmp) error("Unexpected argument to --nsites: %s\n", optarg); break; 
            case 'h':
            case '?':
            default: error("%s", usage()); break;
        }
    }

    args->sr = bcf_sr_init();
    args->sr->require_index = 1;
    if ( !argv[0] ) error("%s", usage());
    if ( !bcf_sr_add_reader(args->sr,argv[0]) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum));
    args->hdr = args->sr->readers[0].header;
    args->nsample = bcf_hdr_nsamples(args->hdr);
 
    args->dflt_ploidy = 2;
    if ( ploidy_fname )
    {
        args->ploidy = ploidy_init(ploidy_fname, args->dflt_ploidy);
        if ( !args->ploidy ) error("Could not read %s\n", ploidy_fname);
    }
    else
    {
        args->ploidy = ploidy_init_string(
                "X 1 60000 M 1\n"
                "X 2699521 154931043 M 1\n"
                "Y 1 59373566 M 1\n"
                "Y 1 59373566 F 0\n", args->dflt_ploidy);
    }
    args->nsex = ploidy_nsex(args->ploidy);
    args->sex2ploidy = (int*) malloc(sizeof(int)*args->nsex);
    args->max_ploidy = ploidy_max(args->ploidy);
    if ( args->guess && args->max_ploidy > 2 ) error("Sorry, ploidy %d not supported with -g\n", args->max_ploidy);
    args->ncounts = args->nsample * ((args->max_ploidy>2 ? args->max_ploidy : 2)+1);
    args->counts = (int*) malloc(sizeof(int)*args->ncounts);
    args->bg_counts = (count_t*) calloc(args->nsample,sizeof(count_t));
    args->sex2prob = (float*) calloc(args->nsample*args->nsex,sizeof(float));

    int i, nseq;
    for (i=0; i<args->nsample*args->nsex; i++) args->sex2prob[i] = 1;

    if ( args->verbose && args->guess )
        printf("# [1]REG\t[2]Region\t[3]Sample\t[4]Het fraction\t[5]nHet\t[6]nHom\t[7]nMissing\n");

    // First get the counts from expected haploid regions
    regidx_t *idx = ploidy_regions(args->ploidy);
    char **seqs = regidx_seq_names(idx, &nseq);
    for (i=0; i<nseq; i++)
    {
        regitr_t itr;
        regidx_overlap(idx, seqs[i], 0, UINT32_MAX, &itr);
        while ( itr.i < itr.n )
        {
            if ( args->guess )
                itr.i += process_region_guess(args, seqs[i], &itr);
            else
                itr.i += process_region_precise(args, seqs[i], &itr);
        }
    }
    // Get the counts from a PAR (the background diploid region) and see if the fraction
    // of hets is different
    if ( args->guess ) sex2prob_guess(args);

    for (i=0; i<args->nsample; i++)
    {
        int j, jmax = 0;
        float max = 0, sum = 0;
        for (j=0; j<args->nsex; j++)
        {
            sum += args->sex2prob[i*args->nsex+j];
            if ( max < args->sex2prob[i*args->nsex+j] )
            {
                jmax = j;
                max = args->sex2prob[i*args->nsex+j];
            }
        }
        if ( args->verbose )
            printf("%s\t%s\t%f\n", args->hdr->samples[i],ploidy_id2sex(args->ploidy,jmax),args->sex2prob[i*args->nsex+jmax]/sum);
        else
            printf("%s\t%s\n", args->hdr->samples[i],ploidy_id2sex(args->ploidy,jmax));
    }
   
    bcf_sr_destroy(args->sr);
    ploidy_destroy(args->ploidy);
    destroy_regs(args);
    free(args->sex2ploidy);
    free(args->counts);
    free(args->bg_counts);
    free(args->gts);
    free(args->pls);
    free(args->sex2prob);
    free(args);
    return 0;
}
コード例 #9
0
 bcf_srs() { m_sr = bcf_sr_init(); }
コード例 #10
0
ファイル: vcfcall.c プロジェクト: humanlongevity/bcftools
static void init_data(args_t *args)
{
    args->aux.srs = bcf_sr_init();

    // Open files for input and output, initialize structures
    if ( args->targets )
    {
        if ( bcf_sr_set_targets(args->aux.srs, args->targets, args->targets_is_file, args->aux.flag&CALL_CONSTR_ALLELES ? 3 : 0)<0 )
            error("Failed to read the targets: %s\n", args->targets);

        if ( args->aux.flag&CALL_CONSTR_ALLELES && args->flag&CF_INS_MISSED )
        {
            args->aux.srs->targets->missed_reg_handler = print_missed_line;
            args->aux.srs->targets->missed_reg_data = args;
        }
    }
    if ( args->regions )
    {
        if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
            error("Failed to read the targets: %s\n", args->regions);
    }

    if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open %s: %s\n", args->bcf_fname,bcf_sr_strerror(args->aux.srs->errnum));
    args->aux.hdr = bcf_sr_get_header(args->aux.srs,0);

    int i;
    if ( args->samples_fname )
    {
        set_samples(args, args->samples_fname, args->samples_is_file);
        if ( args->aux.flag&CALL_CONSTR_TRIO )
        {
            if ( 3*args->aux.nfams!=args->nsamples ) error("Expected only trios in %s, sorry!\n", args->samples_fname);
            fprintf(stderr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
        }
        args->nsex = ploidy_nsex(args->ploidy);
        args->sex2ploidy = (int*) calloc(args->nsex,sizeof(int));
        args->sex2ploidy_prev = (int*) calloc(args->nsex,sizeof(int));
        args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
        for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
        for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
    }

    if ( args->samples_map )
    {
        args->aux.hdr = bcf_hdr_subset(bcf_sr_get_header(args->aux.srs,0), args->nsamples, args->samples, args->samples_map);
        if ( !args->aux.hdr ) error("Error occurred while subsetting samples\n");
        for (i=0; i<args->nsamples; i++)
            if ( args->samples_map[i]<0 ) error("No such sample: %s\n", args->samples[i]);
        if ( !bcf_hdr_nsamples(args->aux.hdr) ) error("No matching sample found\n");
    }
    else
    {
        args->aux.hdr = bcf_hdr_dup(bcf_sr_get_header(args->aux.srs,0));
        for (i=0; i<args->nsamples; i++)
            if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
                error("No such sample: %s\n", args->samples[i]);
    }

    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
    if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));

    if ( args->flag & CF_QCALL )
        return;

    if ( args->flag & CF_MCALL )
        mcall_init(&args->aux);

    if ( args->flag & CF_CCALL )
        ccall_init(&args->aux);

    if ( args->flag&CF_GVCF )
    {
        bcf_hdr_append(args->aux.hdr,"##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
        args->gvcf.rid  = -1;
        args->gvcf.line = bcf_init1();
        args->gvcf.gt   = (int32_t*) malloc(2*sizeof(int32_t)*bcf_hdr_nsamples(args->aux.hdr));
        for (i=0; i<bcf_hdr_nsamples(args->aux.hdr); i++)
        {
            args->gvcf.gt[2*i+0] = bcf_gt_unphased(0);
            args->gvcf.gt[2*i+1] = bcf_gt_unphased(0);
        }
    }

    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");

    bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
    bcf_hdr_write(args->out_fh, args->aux.hdr);

    if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
}