//read program parameters void setArgsBam(argStruct *arguments){ int seed=0; remove_bads = angsd::getArg("-remove_bads",remove_bads,arguments); uniqueOnly = angsd::getArg("-uniqueOnly",uniqueOnly,arguments); only_proper_pairs =angsd::getArg("-only_proper_pairs",only_proper_pairs,arguments); fai_fname =angsd::getArg("-f",fai_fname,arguments); minMapQ = angsd::getArg("-minMapQ",minMapQ,arguments); cigstat = angsd::getArg("-cigstat",cigstat,arguments); minQ = angsd::getArg("-minQ",minQ,arguments); downSample = angsd::getArg("-downSample",downSample,arguments); seed = angsd::getArg("-seed",seed,arguments); trim = angsd::getArg("-trim",trim,arguments); trim5 = angsd::getArg("-trim5",trim5,arguments); trim3 = angsd::getArg("-trim3",trim3,arguments); arguments->ref=angsd::getArg("-ref",arguments->ref,arguments); arguments->anc=angsd::getArg("-anc",arguments->anc,arguments); rghash_name= angsd::getArg("+RG",rghash_name,arguments); if(rghash_name&&!angsd::fexists(rghash_name)) rghash = add_read_group_single(rghash_name); if(rghash_name&&angsd::fexists(rghash_name)) rghash = add_read_groups_file(rghash_name); if(rghash) fprintf(stderr,"\t-> [READGROUP info] Number of readgroups to include: %d\n",khash_str2int_size(rghash)); adjustMapQ = angsd::getArg("-C",adjustMapQ,arguments); baq = angsd::getArg("-baq",baq,arguments); redo_baq = angsd::getArg("-redo-baq",redo_baq,arguments); if(baq){ if(baq==1) baq=1; //wauv else if(baq==2) baq=3; else{ fprintf(stderr,"\t-> only supported options for -baq is: 1 (normal baq) and 2 (extended baq (SAMtools default)). Value supplied:%d\n",baq); exit(0);//ly su } if(redo_baq==1) baq |=4; } // fprintf(stderr,"baq:%d redobaq:%d\n",baq,redo_baq);exit(0); regfile =angsd::getArg("-r",regfile,arguments); regfiles = angsd::getArg("-rf",regfiles,arguments); MAX_SEQ_LEN = angsd::getArg("-setMinChunkSize",MAX_SEQ_LEN,arguments); checkBamHeaders = angsd::getArg("-checkBamHeaders",checkBamHeaders,arguments); doCheck = angsd::getArg("-doCheck",doCheck,arguments); MPLP_IGNORE_RG = angsd::getArg("--ignore-RG",MPLP_IGNORE_RG,arguments); arguments->nReads = angsd::getArg("-nReads",arguments->nReads,arguments); arguments->show = angsd::getArg("-show",arguments->show,arguments); if(regfile && regfiles) fprintf(stderr,"\t-> WARNING both -r and -rf has been set \n"); if(seed) srand48(seed); char *tmp = NULL; tmp = angsd::getArg("-ref",tmp,arguments); if(tmp==NULL && adjustMapQ!=0){ fprintf(stderr,"\t-> Must also supply -ref for adjusting the mapping quality\n"); exit(0); } if(tmp==NULL&&baq!=0){ fprintf(stderr,"\t-> Must also supply -ref for adjusting base qualities (baq)\n"); exit(0); } free(tmp); std::vector<char *> regionsRaw; if(regfiles) regionsRaw = angsd::getFilenames(regfiles,0); if(regfile) regionsRaw.push_back(strdup(regfile)); // fprintf(stderr,"\t-> RegionsRaw.size():%lu hd:%p\n",regionsRaw.size(),arguments->hd); for(size_t i=0;i<regionsRaw.size();i++){ regs tmpRegs; if(parse_region(regionsRaw[i],arguments->hd,tmpRegs.refID,tmpRegs.start,tmpRegs.stop,arguments->revMap)<0||tmpRegs.stop<tmpRegs.start){ fprintf(stderr,"[%s] Problems with indexing: %s\n",__FUNCTION__,regionsRaw[i]); exit(0); }else arguments->regions.push_back(tmpRegs); free(regionsRaw[i]); } printArg(arguments->argumentFile,arguments); if(regfile) free(regfile); if(regfiles) free(regfiles); }
/* The logic of this function is a bit complicated because we want to work also with broken bams containing read groups that are not listed in the header. The desired behavior is as follows: - when -G is given, read groups which are not listed in the header must be given explicitly using the "?" symbol in -G. Otherwise: - if the bam has no header, all reads in the file are assigned to a single sample named after the file - if there is at least one sample defined in the header, reads with no read group id or with a read group id not listed in the header are assigned to the first sample encountered in the header */ int bam_smpl_add_bam(bam_smpl_t *bsmpl, char *bam_hdr, const char *fname) { bsmpl->nfiles++; bsmpl->files = (file_t*) realloc(bsmpl->files,bsmpl->nfiles*sizeof(file_t)); file_t *file = &bsmpl->files[bsmpl->nfiles-1]; memset(file,0,sizeof(file_t)); file->fname = strdup(fname); file->default_idx = -1; if ( bsmpl->ignore_rg || !bam_hdr ) { // The option --ignore-RG is set or there is no BAM header: use the file name as the sample name bsmpl_add_readgroup(bsmpl,file,"*",file->fname); return bsmpl->nfiles-1; } void *bam_smpls = khash_str2int_init(); int first_smpl = -1, nskipped = 0; const char *p = bam_hdr, *q, *r; while (p != NULL && (q = strstr(p, "@RG")) != 0) { char *eol = strchr(q + 3, '\n'); if (q > bam_hdr && *(q - 1) != '\n') { // @RG must be at start of line p = eol; continue; } p = q + 3; if ((q = strstr(p, "\tID:")) != 0) q += 4; if ((r = strstr(p, "\tSM:")) != 0) r += 4; if (r && q) { char *u, *v; int ioq, ior; for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u); for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v); ioq = *u; ior = *v; *u = *v = '\0'; // q now points to a null terminated read group id // r points to a null terminated sample name if ( !strcmp("*",q) || !strcmp("?",q) ) error("Error: the read group IDs \"*\" and \"?\" have a special meaning in the mpileup code. Please fix the code or the bam: %s\n", fname); int accept_rg = 1; if ( bsmpl->sample_list ) { // restrict samples based on the -s/-S options char *name = khash_str2str_get(bsmpl->sample_list,r); if ( bsmpl->sample_logic==0 ) accept_rg = name ? 0 : 1; else if ( !name ) accept_rg = 0; else r = name; } if ( accept_rg && bsmpl->rg_list ) { // restrict readgroups based on the -G option, possibly renaming the sample accept_rg = bsmpl_keep_readgroup(bsmpl,file,q,&r); } if ( accept_rg ) bsmpl_add_readgroup(bsmpl,file,q,r); else { bsmpl_add_readgroup(bsmpl,file,q,NULL); // ignore this RG but note that it was seen in the header nskipped++; } if ( first_smpl<0 ) khash_str2int_get(bsmpl->name2idx,r,&first_smpl); if ( !khash_str2int_has_key(bam_smpls,r) ) khash_str2int_inc(bam_smpls,strdup(r)); *u = ioq; *v = ior; } else break; p = eol; } int nsmpls = khash_str2int_size(bam_smpls); khash_str2int_destroy_free(bam_smpls); const char *smpl_name = NULL; int accept_null_rg = 1; if ( bsmpl->rg_list && !bsmpl_keep_readgroup(bsmpl,file,"?",&smpl_name) ) accept_null_rg = 0; if ( bsmpl->sample_list && first_smpl==-1 ) accept_null_rg = 0; if ( !accept_null_rg && first_smpl==-1 ) { // no suitable read group is available in this bam: ignore the whole file. free(file->fname); if ( file->rg2idx ) khash_str2int_destroy_free(file->rg2idx); bsmpl->nfiles--; return -1; } if ( !accept_null_rg ) return bsmpl->nfiles-1; if ( nsmpls==1 && !nskipped ) { file->default_idx = first_smpl; return bsmpl->nfiles-1; } if ( !smpl_name ) smpl_name = first_smpl==-1 ? file->fname : bsmpl->smpl[first_smpl]; bsmpl_add_readgroup(bsmpl,file,"?",smpl_name); return bsmpl->nfiles-1; }