void bcf_sr_regions_destroy(bcf_sr_regions_t *reg) { int i; free(reg->fname); if ( reg->itr ) tbx_itr_destroy(reg->itr); if ( reg->tbx ) tbx_destroy(reg->tbx); if ( reg->file ) hts_close(reg->file); if ( reg->als ) free(reg->als); if ( reg->als_str.s ) free(reg->als_str.s); free(reg->line.s); if ( reg->regs ) { // free only in-memory names, tbx names are const for (i=0; i<reg->nseqs; i++) { free(reg->seq_names[i]); free(reg->regs[i].regs); } } free(reg->regs); free(reg->seq_names); khash_str2int_destroy(reg->seq_hash); free(reg); }
int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file) { int i, j, nsmpl, free_smpl = 0; char **smpl = NULL; void *exclude = (fname[0]=='^') ? khash_str2int_init() : NULL; if ( exclude || strcmp("-",fname) ) // "-" stands for all samples { smpl = hts_readlist(fname, is_file, &nsmpl); if ( !smpl ) { fprintf(stderr,"Could not read the file: \"%s\"\n", fname); return 0; } if ( exclude ) { for (i=0; i<nsmpl; i++) khash_str2int_inc(exclude, smpl[i]); } free_smpl = 1; } if ( !smpl ) { smpl = files->readers[0].header->samples; // intersection of all samples nsmpl = bcf_hdr_nsamples(files->readers[0].header); } files->samples = NULL; files->n_smpl = 0; for (i=0; i<nsmpl; i++) { if ( exclude && khash_str2int_has_key(exclude,smpl[i]) ) continue; int n_isec = 0; for (j=0; j<files->nreaders; j++) { if ( bcf_hdr_id2int(files->readers[j].header, BCF_DT_SAMPLE, smpl[i])<0 ) break; n_isec++; } if ( n_isec!=files->nreaders ) { fprintf(stderr,"Warning: The sample \"%s\" was not found in %s, skipping\n", smpl[i], files->readers[n_isec].fname); continue; } files->samples = (char**) realloc(files->samples, (files->n_smpl+1)*sizeof(const char*)); files->samples[files->n_smpl++] = strdup(smpl[i]); } if ( exclude ) khash_str2int_destroy(exclude); if ( free_smpl ) { for (i=0; i<nsmpl; i++) free(smpl[i]); free(smpl); } if ( !files->n_smpl ) { if ( files->nreaders>1 ) fprintf(stderr,"No samples in common.\n"); return 0; } for (i=0; i<files->nreaders; i++) { bcf_sr_t *reader = &files->readers[i]; reader->samples = (int*) malloc(sizeof(int)*files->n_smpl); reader->n_smpl = files->n_smpl; for (j=0; j<files->n_smpl; j++) reader->samples[j] = bcf_hdr_id2int(reader->header, BCF_DT_SAMPLE, files->samples[j]); } return 1; }
static void init_data(args_t *args) { int i; args->hdr = args->files->readers[0].header; if (args->calc_ac && args->update_info) { bcf_hdr_append(args->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes\">"); bcf_hdr_append(args->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">"); } bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view"); // setup sample data if (args->sample_names) { void *hdr_samples = khash_str2int_init(); for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) khash_str2int_inc(hdr_samples, bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,i)); void *exclude = (args->sample_names[0]=='^') ? khash_str2int_init() : NULL; int nsmpl; char **smpl = NULL; args->samples = NULL; args->n_samples = 0; smpl = hts_readlist(exclude ? &args->sample_names[1] : args->sample_names, args->sample_is_file, &nsmpl); if ( !smpl ) { error("Could not read the list: \"%s\"\n", exclude ? &args->sample_names[1] : args->sample_names); } if ( exclude ) { for (i=0; i<nsmpl; i++) { if (!khash_str2int_has_key(hdr_samples,smpl[i])) { if (args->force_samples) { fprintf(stderr, "Warn: exclude called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]); } else { error("Error: exclude called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]); } } khash_str2int_inc(exclude, smpl[i]); } for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) { if ( exclude && khash_str2int_has_key(exclude,bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,i)) ) continue; args->samples = (char**) realloc(args->samples, (args->n_samples+1)*sizeof(const char*)); args->samples[args->n_samples++] = strdup(bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,i)); } khash_str2int_destroy(exclude); } else { for (i=0; i<nsmpl; i++) { if (!khash_str2int_has_key(hdr_samples,smpl[i])) { if (args->force_samples) { fprintf(stderr, "Warn: subset called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]); continue; } else { error("Error: subset called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]); } } args->samples = (char**) realloc(args->samples, (args->n_samples+1)*sizeof(const char*)); args->samples[args->n_samples++] = strdup(smpl[i]); } } for (i=0; i<nsmpl; i++) free(smpl[i]); free(smpl); khash_str2int_destroy(hdr_samples); if (args->n_samples == 0) { fprintf(stderr, "Warn: subsetting has removed all samples\n"); args->sites_only = 1; } } if (args->n_samples) args->imap = (int*)malloc(args->n_samples * sizeof(int)); // determine variant types to include/exclude if (args->include_types || args->exclude_types) { if (args->include_types && args->exclude_types) { fprintf(stderr, "Error: only supply one of --include-types, --exclude-types options\n"); exit(1); } char **type_list = 0; int m = 0, n = 0; const char *q, *p; for (q = p = args->include_types ? args->include_types : args->exclude_types;; ++p) { if (*p == ',' || *p == 0) { if (m == n) { m = m? m<<1 : 16; type_list = (char**)realloc(type_list, m * sizeof(char*)); } type_list[n] = (char*)calloc(p - q + 1, 1); strncpy(type_list[n++], q, p - q); q = p + 1; if (*p == 0) break; } } type_list = (char**)realloc(type_list, n * sizeof(char*)); if (args->include_types) { args->include = 0; for (i = 0; i < n; ++i) { if (strcmp(type_list[i], "snps") == 0) args->include |= VCF_SNP; else if (strcmp(type_list[i], "indels") == 0) args->include |= VCF_INDEL; else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP; else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER; else { fprintf(stderr, "[E::%s] unknown type\n", type_list[i]); fprintf(stderr, "Accepted types are snps, indels, mnps, other\n"); exit(1); } } } if (args->exclude_types) { args->exclude = 0; for (i = 0; i < n; ++i) { if (strcmp(type_list[i], "snps") == 0) args->exclude |= VCF_SNP; else if (strcmp(type_list[i], "indels") == 0) args->exclude |= VCF_INDEL; else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP; else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER; else { fprintf(stderr, "[E::%s] unknown type\n", type_list[i]); fprintf(stderr, "Accepted types are snps, indels, mnps, other\n"); exit(1); } } } for (i = 0; i < n; ++i) free(type_list[i]); free(type_list); } // setup output char modew[8]; strcpy(modew, "w"); if (args->clevel >= 0 && args->clevel <= 9) sprintf(modew + 1, "%d", args->clevel); if (args->output_type==FT_BCF) strcat(modew, "bu"); // uncompressed BCF else if (args->output_type & FT_BCF) strcat(modew, "b"); // compressed BCF else if (args->output_type & FT_GZ) strcat(modew,"z"); // compressed VCF args->out = hts_open(args->fn_out ? args->fn_out : "-", modew); if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno)); // headers: hdr=full header, hsub=subset header, hnull=sites only header if (args->sites_only){ args->hnull = bcf_hdr_subset(args->hdr, 0, 0, 0); bcf_hdr_remove(args->hnull, BCF_HL_FMT, NULL); } if (args->n_samples > 0) { args->hsub = bcf_hdr_subset(args->hdr, args->n_samples, args->samples, args->imap); if ( !args->hsub ) error("Error occurred while subsetting samples\n"); if ( args->n_samples != bcf_hdr_nsamples(args->hsub) ) { int i; for (i=0; i<args->n_samples; i++) if ( args->imap[i]<0 ) error("Error: No such sample: \"%s\"\n", args->samples[i]); } } if ( args->filter_str ) args->filter = filter_init(args->hdr, args->filter_str); }