Exemplo n.º 1
0
int run(int argc, char **argv)
{
    args_t *args  = (args_t*) calloc(1,sizeof(args_t));
    args->nsites = 10;
    args->min_hets = 0.3;
    args->background = "X:60001-2699520";
    static struct option loptions[] =
    {
        {"verbose",1,0,'v'},
        {"ploidy",1,0,'p'},
        {"nsites",1,0,'n'},
        {"guess",1,0,'g'},
        {"min-hets",1,0,'m'},
        {"background",1,0,'b'},
        {0,0,0,0}
    };
    char c, *tmp, *ploidy_fname = NULL;
    while ((c = getopt_long(argc, argv, "p:n:g:m:vb:",loptions,NULL)) >= 0)
    {
        switch (c) {
            case 'b': 
                if ( !strcmp("-",optarg) ) args->background = NULL;
                else args->background = optarg; 
                break; 
            case 'v': args->verbose = 1; break; 
            case 'g':
                if ( !strcasecmp(optarg,"GT") ) args->guess = GUESS_GT;
                else if ( !strcasecmp(optarg,"PL") ) args->guess = GUESS_PL;
                else if ( !strcasecmp(optarg,"GL") ) args->guess = GUESS_GL;
                else error("The argument not recognised, expected --guess GT, --guess PL or --guess GL: %s\n", optarg);
                break;
            case 'm': 
                args->min_hets = strtod(optarg,&tmp); 
                if ( *tmp ) error("Unexpected argument to --min-hets: %s\n", optarg);
                break; 
            case 'p': ploidy_fname = optarg; break; 
            case 'n': 
                args->nsites = strtol(optarg,&tmp,10); 
                if (*tmp) error("Unexpected argument to --nsites: %s\n", optarg); break; 
            case 'h':
            case '?':
            default: error("%s", usage()); break;
        }
    }

    args->sr = bcf_sr_init();
    args->sr->require_index = 1;
    if ( !argv[0] ) error("%s", usage());
    if ( !bcf_sr_add_reader(args->sr,argv[0]) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum));
    args->hdr = args->sr->readers[0].header;
    args->nsample = bcf_hdr_nsamples(args->hdr);
 
    args->dflt_ploidy = 2;
    if ( ploidy_fname )
    {
        args->ploidy = ploidy_init(ploidy_fname, args->dflt_ploidy);
        if ( !args->ploidy ) error("Could not read %s\n", ploidy_fname);
    }
    else
    {
        args->ploidy = ploidy_init_string(
                "X 1 60000 M 1\n"
                "X 2699521 154931043 M 1\n"
                "Y 1 59373566 M 1\n"
                "Y 1 59373566 F 0\n", args->dflt_ploidy);
    }
    args->nsex = ploidy_nsex(args->ploidy);
    args->sex2ploidy = (int*) malloc(sizeof(int)*args->nsex);
    args->max_ploidy = ploidy_max(args->ploidy);
    if ( args->guess && args->max_ploidy > 2 ) error("Sorry, ploidy %d not supported with -g\n", args->max_ploidy);
    args->ncounts = args->nsample * ((args->max_ploidy>2 ? args->max_ploidy : 2)+1);
    args->counts = (int*) malloc(sizeof(int)*args->ncounts);
    args->bg_counts = (count_t*) calloc(args->nsample,sizeof(count_t));
    args->sex2prob = (float*) calloc(args->nsample*args->nsex,sizeof(float));

    int i, nseq;
    for (i=0; i<args->nsample*args->nsex; i++) args->sex2prob[i] = 1;

    if ( args->verbose && args->guess )
        printf("# [1]REG\t[2]Region\t[3]Sample\t[4]Het fraction\t[5]nHet\t[6]nHom\t[7]nMissing\n");

    // First get the counts from expected haploid regions
    regidx_t *idx = ploidy_regions(args->ploidy);
    char **seqs = regidx_seq_names(idx, &nseq);
    for (i=0; i<nseq; i++)
    {
        regitr_t itr;
        regidx_overlap(idx, seqs[i], 0, UINT32_MAX, &itr);
        while ( itr.i < itr.n )
        {
            if ( args->guess )
                itr.i += process_region_guess(args, seqs[i], &itr);
            else
                itr.i += process_region_precise(args, seqs[i], &itr);
        }
    }
    // Get the counts from a PAR (the background diploid region) and see if the fraction
    // of hets is different
    if ( args->guess ) sex2prob_guess(args);

    for (i=0; i<args->nsample; i++)
    {
        int j, jmax = 0;
        float max = 0, sum = 0;
        for (j=0; j<args->nsex; j++)
        {
            sum += args->sex2prob[i*args->nsex+j];
            if ( max < args->sex2prob[i*args->nsex+j] )
            {
                jmax = j;
                max = args->sex2prob[i*args->nsex+j];
            }
        }
        if ( args->verbose )
            printf("%s\t%s\t%f\n", args->hdr->samples[i],ploidy_id2sex(args->ploidy,jmax),args->sex2prob[i*args->nsex+jmax]/sum);
        else
            printf("%s\t%s\n", args->hdr->samples[i],ploidy_id2sex(args->ploidy,jmax));
    }
   
    bcf_sr_destroy(args->sr);
    ploidy_destroy(args->ploidy);
    destroy_regs(args);
    free(args->sex2ploidy);
    free(args->counts);
    free(args->bg_counts);
    free(args->gts);
    free(args->pls);
    free(args->sex2prob);
    free(args);
    return 0;
}
Exemplo n.º 2
0
static void init_data(args_t *args)
{
    args->aux.srs = bcf_sr_init();

    // Open files for input and output, initialize structures
    if ( args->targets )
    {
        if ( bcf_sr_set_targets(args->aux.srs, args->targets, args->targets_is_file, args->aux.flag&CALL_CONSTR_ALLELES ? 3 : 0)<0 )
            error("Failed to read the targets: %s\n", args->targets);

        if ( args->aux.flag&CALL_CONSTR_ALLELES && args->flag&CF_INS_MISSED )
        {
            args->aux.srs->targets->missed_reg_handler = print_missed_line;
            args->aux.srs->targets->missed_reg_data = args;
        }
    }
    if ( args->regions )
    {
        if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
            error("Failed to read the targets: %s\n", args->regions);
    }

    if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open %s: %s\n", args->bcf_fname,bcf_sr_strerror(args->aux.srs->errnum));
    args->aux.hdr = bcf_sr_get_header(args->aux.srs,0);

    int i;
    if ( args->samples_fname )
    {
        set_samples(args, args->samples_fname, args->samples_is_file);
        if ( args->aux.flag&CALL_CONSTR_TRIO )
        {
            if ( 3*args->aux.nfams!=args->nsamples ) error("Expected only trios in %s, sorry!\n", args->samples_fname);
            fprintf(stderr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
        }
        args->nsex = ploidy_nsex(args->ploidy);
        args->sex2ploidy = (int*) calloc(args->nsex,sizeof(int));
        args->sex2ploidy_prev = (int*) calloc(args->nsex,sizeof(int));
        args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
        for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
        for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
    }

    if ( args->samples_map )
    {
        args->aux.hdr = bcf_hdr_subset(bcf_sr_get_header(args->aux.srs,0), args->nsamples, args->samples, args->samples_map);
        if ( !args->aux.hdr ) error("Error occurred while subsetting samples\n");
        for (i=0; i<args->nsamples; i++)
            if ( args->samples_map[i]<0 ) error("No such sample: %s\n", args->samples[i]);
        if ( !bcf_hdr_nsamples(args->aux.hdr) ) error("No matching sample found\n");
    }
    else
    {
        args->aux.hdr = bcf_hdr_dup(bcf_sr_get_header(args->aux.srs,0));
        for (i=0; i<args->nsamples; i++)
            if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
                error("No such sample: %s\n", args->samples[i]);
    }

    args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
    if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));

    if ( args->flag & CF_QCALL )
        return;

    if ( args->flag & CF_MCALL )
        mcall_init(&args->aux);

    if ( args->flag & CF_CCALL )
        ccall_init(&args->aux);

    if ( args->flag&CF_GVCF )
    {
        bcf_hdr_append(args->aux.hdr,"##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
        args->gvcf.rid  = -1;
        args->gvcf.line = bcf_init1();
        args->gvcf.gt   = (int32_t*) malloc(2*sizeof(int32_t)*bcf_hdr_nsamples(args->aux.hdr));
        for (i=0; i<bcf_hdr_nsamples(args->aux.hdr); i++)
        {
            args->gvcf.gt[2*i+0] = bcf_gt_unphased(0);
            args->gvcf.gt[2*i+1] = bcf_gt_unphased(0);
        }
    }

    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
    bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");

    bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
    bcf_hdr_write(args->out_fh, args->aux.hdr);

    if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
}