int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, void *payload, void *usr) { int i, ret; ploidy_t *ploidy = (ploidy_t*) usr; void *sex2id = ploidy->sex2id; // Check for special case of default ploidy "* * * <sex> <ploidy>" int default_ploidy_def = 0; char *ss = (char*) line; while ( *ss && isspace(*ss) ) ss++; if ( ss[0]=='*' && (!ss[1] || isspace(ss[1])) ) default_ploidy_def = 1; // definition of default ploidy, chr="*" else { // Fill CHR,FROM,TO ret = regidx_parse_tab(line,chr_beg,chr_end,reg,NULL,NULL); if ( ret!=0 ) return ret; } // Skip the fields already parsed by regidx_parse_tab ss = (char*) line; while ( *ss && isspace(*ss) ) ss++; for (i=0; i<3; i++) { while ( *ss && !isspace(*ss) ) ss++; if ( !*ss ) return -2; // wrong number of fields while ( *ss && isspace(*ss) ) ss++; } if ( !*ss ) return -2; // Parse the payload char *se = ss; while ( *se && !isspace(*se) ) se++; if ( !*se || se==ss ) error("Could not parse: %s\n", line); ploidy->tmp_str.l = 0; kputsn(ss,se-ss,&ploidy->tmp_str); sex_ploidy_t *sp = (sex_ploidy_t*) payload; if ( khash_str2int_get(sex2id, ploidy->tmp_str.s, &sp->sex) != 0 ) { ploidy->nsex++; hts_expand0(char*,ploidy->nsex,ploidy->msex,ploidy->id2sex); ploidy->id2sex[ploidy->nsex-1] = strdup(ploidy->tmp_str.s); sp->sex = khash_str2int_inc(ploidy->sex2id, ploidy->id2sex[ploidy->nsex-1]); ploidy->sex2dflt = (int*) realloc(ploidy->sex2dflt,sizeof(int)*ploidy->nsex); ploidy->sex2dflt[ploidy->nsex-1] = ploidy->dflt; }
inline int regidx_push(regidx_t *idx, char *chr_beg, char *chr_end, uint32_t beg, uint32_t end, void *payload) { if ( beg > MAX_COOR_0 ) beg = MAX_COOR_0; if ( end > MAX_COOR_0 ) end = MAX_COOR_0; int rid; idx->str.l = 0; kputsn(chr_beg, chr_end-chr_beg+1, &idx->str); if ( khash_str2int_get(idx->seq2regs, idx->str.s, &rid)!=0 ) { // new chromosome idx->nseq++; int m_prev = idx->mseq; hts_expand0(reglist_t,idx->nseq,idx->mseq,idx->seq); hts_expand0(char*,idx->nseq,m_prev,idx->seq_names); idx->seq_names[idx->nseq-1] = strdup(idx->str.s); rid = khash_str2int_inc(idx->seq2regs, idx->seq_names[idx->nseq-1]); }
int regidx_insert(regidx_t *idx, char *line) { if ( !line ) return _regidx_build_index(idx); char *chr_from, *chr_to; reg_t reg; int ret = idx->parse(line,&chr_from,&chr_to,®,idx->payload,idx->usr); if ( ret==-2 ) return -1; // error if ( ret==-1 ) return 0; // skip the line int rid; idx->str.l = 0; kputsn(chr_from, chr_to-chr_from+1, &idx->str); if ( khash_str2int_get(idx->seq2regs, idx->str.s, &rid)!=0 ) { idx->nseq++; int m_prev = idx->mseq; hts_expand0(reglist_t,idx->nseq,idx->mseq,idx->seq); hts_expand0(char*,idx->nseq,m_prev,idx->seq_names); idx->seq_names[idx->nseq-1] = strdup(idx->str.s); rid = khash_str2int_inc(idx->seq2regs, idx->seq_names[idx->nseq-1]); }
static void list_columns(args_t *args) { void *has_sample = NULL; if ( args->sample_list ) { has_sample = khash_str2int_init(); int i, nsmpl; char **smpl = hts_readlist(args->sample_list, args->sample_is_file, &nsmpl); for (i=0; i<nsmpl; i++) khash_str2int_inc(has_sample, smpl[i]); free(smpl); } int i; bcf_sr_t *reader = &args->files->readers[0]; for (i=0; i<bcf_hdr_nsamples(reader->header); i++) { if ( has_sample && !khash_str2int_has_key(has_sample, reader->header->samples[i]) ) continue; printf("%s\n", reader->header->samples[i]); } if ( has_sample ) khash_str2int_destroy_free(has_sample); }
static void bsmpl_add_readgroup(bam_smpl_t *bsmpl, file_t *file, const char *rg_id, const char *smpl_name) { int ismpl = -1; if ( smpl_name ) { if ( khash_str2int_get(bsmpl->name2idx,smpl_name,&ismpl) < 0 ) { // new sample bsmpl->nsmpl++; bsmpl->smpl = (char**) realloc(bsmpl->smpl,sizeof(char*)*bsmpl->nsmpl); bsmpl->smpl[bsmpl->nsmpl-1] = strdup(smpl_name); ismpl = khash_str2int_inc(bsmpl->name2idx,bsmpl->smpl[bsmpl->nsmpl-1]); } } if ( !strcmp("*",rg_id) ) { // all read groups in the bam treated as the same sample file->default_idx = ismpl; return; } if ( !file->rg2idx ) file->rg2idx = khash_str2int_init(); if ( khash_str2int_has_key(file->rg2idx,rg_id) ) return; // duplicate @RG:ID khash_str2int_set(file->rg2idx, strdup(rg_id), ismpl); }
int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file) { int i, j, nsmpl, free_smpl = 0; char **smpl = NULL; void *exclude = (fname[0]=='^') ? khash_str2int_init() : NULL; if ( exclude || strcmp("-",fname) ) // "-" stands for all samples { smpl = hts_readlist(fname, is_file, &nsmpl); if ( !smpl ) { fprintf(stderr,"Could not read the file: \"%s\"\n", fname); return 0; } if ( exclude ) { for (i=0; i<nsmpl; i++) khash_str2int_inc(exclude, smpl[i]); } free_smpl = 1; } if ( !smpl ) { smpl = files->readers[0].header->samples; // intersection of all samples nsmpl = bcf_hdr_nsamples(files->readers[0].header); } files->samples = NULL; files->n_smpl = 0; for (i=0; i<nsmpl; i++) { if ( exclude && khash_str2int_has_key(exclude,smpl[i]) ) continue; int n_isec = 0; for (j=0; j<files->nreaders; j++) { if ( bcf_hdr_id2int(files->readers[j].header, BCF_DT_SAMPLE, smpl[i])<0 ) break; n_isec++; } if ( n_isec!=files->nreaders ) { fprintf(stderr,"Warning: The sample \"%s\" was not found in %s, skipping\n", smpl[i], files->readers[n_isec].fname); continue; } files->samples = (char**) realloc(files->samples, (files->n_smpl+1)*sizeof(const char*)); files->samples[files->n_smpl++] = strdup(smpl[i]); } if ( exclude ) khash_str2int_destroy(exclude); if ( free_smpl ) { for (i=0; i<nsmpl; i++) free(smpl[i]); free(smpl); } if ( !files->n_smpl ) { if ( files->nreaders>1 ) fprintf(stderr,"No samples in common.\n"); return 0; } for (i=0; i<files->nreaders; i++) { bcf_sr_t *reader = &files->readers[i]; reader->samples = (int*) malloc(sizeof(int)*files->n_smpl); reader->n_smpl = files->n_smpl; for (j=0; j<files->n_smpl; j++) reader->samples[j] = bcf_hdr_id2int(reader->header, BCF_DT_SAMPLE, files->samples[j]); } return 1; }
int bam_mpileup(int argc, char *argv[]) { int c; const char *file_list = NULL; char **fn = NULL; int nfiles = 0, use_orphan = 0; mplp_conf_t mplp; memset(&mplp, 0, sizeof(mplp_conf_t)); mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; mplp.max_indel_depth = 250; mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.min_frac = 0.002; mplp.min_support = 1; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS; mplp.argc = argc; mplp.argv = argv; mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP; mplp.output_fname = NULL; static const struct option lopts[] = { {"rf", required_argument, NULL, 1}, // require flag {"ff", required_argument, NULL, 2}, // filter flag {"incl-flags", required_argument, NULL, 1}, {"excl-flags", required_argument, NULL, 2}, {"output", required_argument, NULL, 3}, {"open-prob", required_argument, NULL, 4}, {"illumina1.3+", no_argument, NULL, '6'}, {"count-orphans", no_argument, NULL, 'A'}, {"bam-list", required_argument, NULL, 'b'}, {"no-BAQ", no_argument, NULL, 'B'}, {"no-baq", no_argument, NULL, 'B'}, {"adjust-MQ", required_argument, NULL, 'C'}, {"adjust-mq", required_argument, NULL, 'C'}, {"max-depth", required_argument, NULL, 'd'}, {"redo-BAQ", no_argument, NULL, 'E'}, {"redo-baq", no_argument, NULL, 'E'}, {"fasta-ref", required_argument, NULL, 'f'}, {"exclude-RG", required_argument, NULL, 'G'}, {"exclude-rg", required_argument, NULL, 'G'}, {"positions", required_argument, NULL, 'l'}, {"region", required_argument, NULL, 'r'}, {"ignore-RG", no_argument, NULL, 'R'}, {"ignore-rg", no_argument, NULL, 'R'}, {"min-MQ", required_argument, NULL, 'q'}, {"min-mq", required_argument, NULL, 'q'}, {"min-BQ", required_argument, NULL, 'Q'}, {"min-bq", required_argument, NULL, 'Q'}, {"ignore-overlaps", no_argument, NULL, 'x'}, {"BCF", no_argument, NULL, 'g'}, {"bcf", no_argument, NULL, 'g'}, {"VCF", no_argument, NULL, 'v'}, {"vcf", no_argument, NULL, 'v'}, {"output-BP", no_argument, NULL, 'O'}, {"output-bp", no_argument, NULL, 'O'}, {"output-MQ", no_argument, NULL, 's'}, {"output-mq", no_argument, NULL, 's'}, {"output-tags", required_argument, NULL, 't'}, {"uncompressed", no_argument, NULL, 'u'}, {"ext-prob", required_argument, NULL, 'e'}, {"gap-frac", required_argument, NULL, 'F'}, {"tandem-qual", required_argument, NULL, 'h'}, {"skip-indels", no_argument, NULL, 'I'}, {"max-idepth", required_argument, NULL, 'L'}, {"min-ireads ", required_argument, NULL, 'm'}, {"per-sample-mF", no_argument, NULL, 'p'}, {"per-sample-mf", no_argument, NULL, 'p'}, {"platforms", required_argument, NULL, 'P'}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) { switch (c) { case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break; case 1 : mplp.rflag_require = bam_str2flag(optarg); if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; } break; case 2 : mplp.rflag_filter = bam_str2flag(optarg); if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; } break; case 3 : mplp.output_fname = optarg; break; case 4 : mplp.openQ = atoi(optarg); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; mplp.fai_fname = optarg; break; case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg = strdup(optarg); break; case 'l': // In the original version the whole BAM was streamed which is inefficient // with few BED intervals and big BAMs. Todo: devise a heuristic to determine // best strategy, that is streaming or jumping. mplp.bed = bed_read(optarg); if (!mplp.bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; } break; case 'P': mplp.pl_list = strdup(optarg); break; case 'p': mplp.flag |= MPLP_PER_SAMPLE; break; case 'g': mplp.flag |= MPLP_BCF; break; case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break; case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break; case 'B': mplp.flag &= ~MPLP_REALN; break; case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break; case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break; case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break; case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'E': mplp.flag |= MPLP_REDO_BAQ; break; case '6': mplp.flag |= MPLP_ILLUMINA13; break; case 'R': mplp.flag |= MPLP_IGNORE_RG; break; case 's': mplp.flag |= MPLP_PRINT_MAPQ; break; case 'O': mplp.flag |= MPLP_PRINT_POS; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; case 'o': { char *end; long value = strtol(optarg, &end, 10); // Distinguish between -o INT and -o FILE (a bit of a hack!) if (*end == '\0') mplp.openQ = value; else mplp.output_fname = optarg; } break; case 'e': mplp.extQ = atoi(optarg); break; case 'h': mplp.tandemQ = atoi(optarg); break; case 'A': use_orphan = 1; break; case 'F': mplp.min_frac = atof(optarg); break; case 'm': mplp.min_support = atoi(optarg); break; case 'L': mplp.max_indel_depth = atoi(optarg); break; case 'G': { FILE *fp_rg; char buf[1024]; mplp.rghash = khash_str2int_init(); if ((fp_rg = fopen(optarg, "r")) == 0) fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg); while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me... khash_str2int_inc(mplp.rghash, strdup(buf)); fclose(fp_rg); } break; case 't': mplp.fmt_flag |= parse_format_flag(optarg); break; default: fprintf(stderr,"Invalid option: '%c'\n", c); return 1; } } if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ ) { fprintf(stderr,"Error: The -B option cannot be combined with -E\n"); return 1; } if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN; if (argc == 1) { print_usage(stderr, &mplp); return 1; } int ret; if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; ret = mpileup(&mplp,nfiles,fn); for (c=0; c<nfiles; c++) free(fn[c]); free(fn); } else ret = mpileup(&mplp, argc - optind, argv + optind); if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash); free(mplp.reg); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); if (mplp.bed) bed_destroy(mplp.bed); return ret; }
static void init_data(args_t *args) { int i; args->hdr = args->files->readers[0].header; if (args->calc_ac && args->update_info) { bcf_hdr_append(args->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes\">"); bcf_hdr_append(args->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">"); } bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view"); // setup sample data if (args->sample_names) { void *hdr_samples = khash_str2int_init(); for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) khash_str2int_inc(hdr_samples, bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,i)); void *exclude = (args->sample_names[0]=='^') ? khash_str2int_init() : NULL; int nsmpl; char **smpl = NULL; args->samples = NULL; args->n_samples = 0; smpl = hts_readlist(exclude ? &args->sample_names[1] : args->sample_names, args->sample_is_file, &nsmpl); if ( !smpl ) { error("Could not read the list: \"%s\"\n", exclude ? &args->sample_names[1] : args->sample_names); } if ( exclude ) { for (i=0; i<nsmpl; i++) { if (!khash_str2int_has_key(hdr_samples,smpl[i])) { if (args->force_samples) { fprintf(stderr, "Warn: exclude called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]); } else { error("Error: exclude called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]); } } khash_str2int_inc(exclude, smpl[i]); } for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) { if ( exclude && khash_str2int_has_key(exclude,bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,i)) ) continue; args->samples = (char**) realloc(args->samples, (args->n_samples+1)*sizeof(const char*)); args->samples[args->n_samples++] = strdup(bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,i)); } khash_str2int_destroy(exclude); } else { for (i=0; i<nsmpl; i++) { if (!khash_str2int_has_key(hdr_samples,smpl[i])) { if (args->force_samples) { fprintf(stderr, "Warn: subset called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]); continue; } else { error("Error: subset called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]); } } args->samples = (char**) realloc(args->samples, (args->n_samples+1)*sizeof(const char*)); args->samples[args->n_samples++] = strdup(smpl[i]); } } for (i=0; i<nsmpl; i++) free(smpl[i]); free(smpl); khash_str2int_destroy(hdr_samples); if (args->n_samples == 0) { fprintf(stderr, "Warn: subsetting has removed all samples\n"); args->sites_only = 1; } } if (args->n_samples) args->imap = (int*)malloc(args->n_samples * sizeof(int)); // determine variant types to include/exclude if (args->include_types || args->exclude_types) { if (args->include_types && args->exclude_types) { fprintf(stderr, "Error: only supply one of --include-types, --exclude-types options\n"); exit(1); } char **type_list = 0; int m = 0, n = 0; const char *q, *p; for (q = p = args->include_types ? args->include_types : args->exclude_types;; ++p) { if (*p == ',' || *p == 0) { if (m == n) { m = m? m<<1 : 16; type_list = (char**)realloc(type_list, m * sizeof(char*)); } type_list[n] = (char*)calloc(p - q + 1, 1); strncpy(type_list[n++], q, p - q); q = p + 1; if (*p == 0) break; } } type_list = (char**)realloc(type_list, n * sizeof(char*)); if (args->include_types) { args->include = 0; for (i = 0; i < n; ++i) { if (strcmp(type_list[i], "snps") == 0) args->include |= VCF_SNP; else if (strcmp(type_list[i], "indels") == 0) args->include |= VCF_INDEL; else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP; else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER; else { fprintf(stderr, "[E::%s] unknown type\n", type_list[i]); fprintf(stderr, "Accepted types are snps, indels, mnps, other\n"); exit(1); } } } if (args->exclude_types) { args->exclude = 0; for (i = 0; i < n; ++i) { if (strcmp(type_list[i], "snps") == 0) args->exclude |= VCF_SNP; else if (strcmp(type_list[i], "indels") == 0) args->exclude |= VCF_INDEL; else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP; else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER; else { fprintf(stderr, "[E::%s] unknown type\n", type_list[i]); fprintf(stderr, "Accepted types are snps, indels, mnps, other\n"); exit(1); } } } for (i = 0; i < n; ++i) free(type_list[i]); free(type_list); } // setup output char modew[8]; strcpy(modew, "w"); if (args->clevel >= 0 && args->clevel <= 9) sprintf(modew + 1, "%d", args->clevel); if (args->output_type==FT_BCF) strcat(modew, "bu"); // uncompressed BCF else if (args->output_type & FT_BCF) strcat(modew, "b"); // compressed BCF else if (args->output_type & FT_GZ) strcat(modew,"z"); // compressed VCF args->out = hts_open(args->fn_out ? args->fn_out : "-", modew); if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno)); // headers: hdr=full header, hsub=subset header, hnull=sites only header if (args->sites_only){ args->hnull = bcf_hdr_subset(args->hdr, 0, 0, 0); bcf_hdr_remove(args->hnull, BCF_HL_FMT, NULL); } if (args->n_samples > 0) { args->hsub = bcf_hdr_subset(args->hdr, args->n_samples, args->samples, args->imap); if ( !args->hsub ) error("Error occurred while subsetting samples\n"); if ( args->n_samples != bcf_hdr_nsamples(args->hsub) ) { int i; for (i=0; i<args->n_samples; i++) if ( args->imap[i]<0 ) error("Error: No such sample: \"%s\"\n", args->samples[i]); } } if ( args->filter_str ) args->filter = filter_init(args->hdr, args->filter_str); }
/* The logic of this function is a bit complicated because we want to work also with broken bams containing read groups that are not listed in the header. The desired behavior is as follows: - when -G is given, read groups which are not listed in the header must be given explicitly using the "?" symbol in -G. Otherwise: - if the bam has no header, all reads in the file are assigned to a single sample named after the file - if there is at least one sample defined in the header, reads with no read group id or with a read group id not listed in the header are assigned to the first sample encountered in the header */ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname) { bsmpl->nfiles++; bsmpl->files = (file_t*) realloc(bsmpl->files,bsmpl->nfiles*sizeof(file_t)); file_t *file = &bsmpl->files[bsmpl->nfiles-1]; memset(file,0,sizeof(file_t)); file->fname = strdup(fname); file->default_idx = -1; if ( bsmpl->ignore_rg || !bam_hdr ) { // The option --ignore-RG is set or there is no BAM header: use the file name as the sample name bsmpl_add_readgroup(bsmpl,file,"*",file->fname); return bsmpl->nfiles-1; } void *bam_smpls = khash_str2int_init(); int first_smpl = -1, nskipped = 0; const char *p = bam_hdr, *q, *r; while (p != NULL && (q = strstr(p, "@RG")) != 0) { char *eol = strchr(q + 3, '\n'); if (q > bam_hdr && *(q - 1) != '\n') { // @RG must be at start of line p = eol; continue; } p = q + 3; if ((q = strstr(p, "\tID:")) != 0) q += 4; if ((r = strstr(p, "\tSM:")) != 0) r += 4; if (r && q) { char *u, *v; int ioq, ior; for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u); for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v); ioq = *u; ior = *v; *u = *v = '\0'; // q now points to a null terminated read group id // r points to a null terminated sample name if ( !strcmp("*",q) || !strcmp("?",q) ) error("Error: the read group IDs \"*\" and \"?\" have a special meaning in the mpileup code. Please fix the code or the bam: %s\n", fname); int accept_rg = 1; if ( bsmpl->sample_list ) { // restrict samples based on the -s/-S options char *name = khash_str2str_get(bsmpl->sample_list,r); if ( bsmpl->sample_logic==0 ) accept_rg = name ? 0 : 1; else if ( !name ) accept_rg = 0; else r = name; } if ( accept_rg && bsmpl->rg_list ) { // restrict readgroups based on the -G option, possibly renaming the sample accept_rg = bsmpl_keep_readgroup(bsmpl,file,q,&r); } if ( accept_rg ) bsmpl_add_readgroup(bsmpl,file,q,r); else { bsmpl_add_readgroup(bsmpl,file,q,NULL); // ignore this RG but note that it was seen in the header nskipped++; } if ( first_smpl<0 ) khash_str2int_get(bsmpl->name2idx,r,&first_smpl); if ( !khash_str2int_has_key(bam_smpls,r) ) khash_str2int_inc(bam_smpls,strdup(r)); *u = ioq; *v = ior; } else break; p = eol; } int nsmpls = khash_str2int_size(bam_smpls); khash_str2int_destroy_free(bam_smpls); const char *smpl_name = NULL; int accept_null_rg = 1; if ( bsmpl->rg_list && !bsmpl_keep_readgroup(bsmpl,file,"?",&smpl_name) ) accept_null_rg = 0; if ( bsmpl->sample_list && first_smpl==-1 ) accept_null_rg = 0; if ( !accept_null_rg && first_smpl==-1 ) { // no suitable read group is available in this bam: ignore the whole file. free(file->fname); if ( file->rg2idx ) khash_str2int_destroy_free(file->rg2idx); bsmpl->nfiles--; return -1; } if ( !accept_null_rg ) return bsmpl->nfiles-1; if ( nsmpls==1 && !nskipped ) { file->default_idx = first_smpl; return bsmpl->nfiles-1; } if ( !smpl_name ) smpl_name = first_smpl==-1 ? file->fname : bsmpl->smpl[first_smpl]; bsmpl_add_readgroup(bsmpl,file,"?",smpl_name); return bsmpl->nfiles-1; }