char *bcftools_version(void) { if ( !bcftools_version_string ) { int len = strlen(hts_version()) + strlen(BCFTOOLS_VERSION) + 9; bcftools_version_string = (char*) malloc(len); snprintf(bcftools_version_string,len,"%s+htslib-%s", BCFTOOLS_VERSION,hts_version()); } return bcftools_version_string; }
//print nice info void printProgInfo(FILE *fp){ fprintf(fp,"\n\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); fprintf(fp,"\t-> Please use the website \"http://www.popgen.dk/angsd\" as reference\n"); fprintf(fp,"\t-> Use -nThreads or -P for number of threads allocated to the program\n"); fprintf(fp,"Overview of methods:\n"); fprintf(fp,"\t-GL\t\tEstimate genotype likelihoods\n"); fprintf(fp,"\t-doCounts\tCalculate various counts statistics\n"); fprintf(fp,"\t-doAsso\t\tPerform association study\n"); fprintf(fp,"\t-doMaf\t\tEstimate allele frequencies\n"); fprintf(fp,"\t-doError\tEstimate the type specific error rates\n"); fprintf(fp,"\t-doAncError\tEstimate the errorrate based on perfect fastas\n"); fprintf(fp,"\t-HWE_pval\t\tEst inbreedning per site or use as filter\n"); fprintf(fp,"\t-doGeno\t\tCall genotypes\n"); fprintf(fp,"\t-doFasta\tGenerate a fasta for a BAM file\n"); fprintf(fp,"\t-doAbbababa\tPerform an ABBA-BABA test\n"); fprintf(fp,"\t-sites\t\tAnalyse specific sites (can force major/minor)\n"); fprintf(fp,"\t-doSaf\t\tEstimate the SFS and/or neutrality tests genotype calling\n"); fprintf(fp,"\t-doHetPlas\tEstimate hetplasmy by calculating a pooled haploid frequency\n"); fprintf(fp,"\n\tBelow are options that can be usefull\n"); fprintf(fp,"\t-bam\t\tOptions relating to bam reading\n\t-doMajorMinor\tInfer the major/minor using different approaches\n"); fprintf(fp,"\t-ref/-anc\tRead reference or ancestral genome\n"); fprintf(fp,"\t-doSNPstat\tCalculate various SNPstat\n"); fprintf(fp,"\tmany others\n\n"); fprintf(fp,"For information of specific options type: \n\t./angsd METHODNAME eg \n\t\t./angsd -GL\n\t\t./angsd -doMaf\n\t\t./angsd -doAsso etc\n"); fprintf(fp,"\t\t./angsd sites for information about indexing -sites files\n"); fprintf(fp,"Examples:\n\tEstimate MAF for bam files in 'list'\n\t\t\'./angsd -bam list -GL 2 -doMaf 2 -out RES -doMajorMinor 1\'\n"); }
static int usage(void) { fprintf(stderr, "\n"); fprintf(stderr, "Version: %s\n", hts_version()); fprintf(stderr, "Usage: tabix [OPTIONS] [FILE] [REGION [...]]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Indexing Options:\n"); fprintf(stderr, " -0, --zero-based coordinates are zero-based\n"); fprintf(stderr, " -b, --begin INT column number for region start [4]\n"); fprintf(stderr, " -c, --comment CHAR skip comment lines starting with CHAR [null]\n"); fprintf(stderr, " -C, --csi generate CSI index for VCF (default is TBI)\n"); fprintf(stderr, " -e, --end INT column number for region end (if no end, set INT to -b) [5]\n"); fprintf(stderr, " -f, --force overwrite existing index without asking\n"); fprintf(stderr, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n"); fprintf(stderr, " -p, --preset STR gff, bed, sam, vcf\n"); fprintf(stderr, " -s, --sequence INT column number for sequence names (suppressed by -p) [1]\n"); fprintf(stderr, " -S, --skip-lines INT skip first INT lines [0]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Querying and other options:\n"); fprintf(stderr, " -h, --print-header print also the header lines\n"); fprintf(stderr, " -H, --only-header print only the header lines\n"); fprintf(stderr, " -l, --list-chroms list chromosome names\n"); fprintf(stderr, " -r, --reheader FILE replace the header with the content of FILE\n"); fprintf(stderr, " -R, --regions FILE restrict to regions listed in the file\n"); fprintf(stderr, " -T, --targets FILE similar to -R but streams rather than index-jumps\n"); fprintf(stderr, "\n"); return 1; }
int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self) { static const struct hFILE_scheme_handler handler = { hopen_libcurl, hfile_always_remote, "libcurl", 50 }; const curl_version_info_data *info; const char * const *protocol; CURLcode err; err = curl_global_init(CURL_GLOBAL_ALL); if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; } curl.multi = curl_multi_init(); if (curl.multi == NULL) { curl_global_cleanup(); errno = EIO; return -1; } info = curl_version_info(CURLVERSION_NOW); ksprintf(&curl.useragent, "htslib/%s libcurl/%s", hts_version(), info->version); curl.nrunning = 0; curl.perform_again = 0; self->name = "libcurl"; self->destroy = libcurl_exit; for (protocol = info->protocols; *protocol; protocol++) hfile_add_scheme_handler(*protocol, &handler); hfile_add_scheme_handler("s3", &handler); hfile_add_scheme_handler("s3+http", &handler); if (info->features & CURL_VERSION_SSL) hfile_add_scheme_handler("s3+https", &handler); return 0; }
static void usage(FILE *fp) { fprintf(fp, "\n"); fprintf(fp, "Program: bcftools (Tools for variant calling and manipulating VCFs and BCFs)\n"); #if USE_GPL fprintf(fp, "License: GNU GPLv3+, due to use of the GNU Scientific Library\n"); #endif fprintf(fp, "Version: %s (using htslib %s)\n", bcftools_version(), hts_version()); fprintf(fp, "\n"); fprintf(fp, "Usage: bcftools [--version|--version-only] [--help] <command> <argument>\n"); fprintf(fp, "\n"); fprintf(fp, "Commands:\n"); int i = 0; const char *sep = NULL; while (cmds[i].alias) { if ( !cmds[i].func ) sep = cmds[i].alias; if ( sep ) { fprintf(fp, "\n -- %s\n", sep); sep = NULL; } if ( cmds[i].func && cmds[i].help[0]!='-' ) fprintf(fp, " %-12s %s\n", cmds[i].alias, cmds[i].help); i++; } fprintf(fp,"\n"); fprintf(fp, " Most commands accept VCF, bgzipped VCF, and BCF with the file type detected\n" " automatically even when streaming from a pipe. Indexed VCF and BCF will work\n" " in all situations. Un-indexed VCF and BCF and streams will work in most but\n" " not all situations.\n"); fprintf(fp,"\n"); }
static void usage(FILE *fp) { /* Please improve the grouping */ fprintf(fp, "\n" "Program: samtools (Tools for alignments in the SAM format)\n" "Version: %s (using htslib %s)\n\n", samtools_version(), hts_version()); fprintf(fp, "Usage: samtools <command> [options]\n" "\n" "Commands:\n" " -- Indexing\n" " dict create a sequence dictionary file\n" " faidx index/extract FASTA\n" " index index alignment\n" "\n" " -- Editing\n" " calmd recalculate MD/NM tags and '=' bases\n" " fixmate fix mate information\n" " reheader replace BAM header\n" " rmdup remove PCR duplicates\n" " targetcut cut fosmid regions (for fosmid pool only)\n" " addreplacerg adds or replaces RG tags\n" "\n" " -- File operations\n" " collate shuffle and group alignments by name\n" " cat concatenate BAMs\n" " merge merge sorted alignments\n" " mpileup multi-way pileup\n" " sort sort alignment file\n" " split splits a file by read group\n" " quickcheck quickly check if SAM/BAM/CRAM file appears intact\n" " fastq converts a BAM to a FASTQ\n" " fasta converts a BAM to a FASTA\n" "\n" " -- Statistics\n" " bedcov read depth per BED region\n" " depth compute the depth\n" " flagstat simple stats\n" " idxstats BAM index stats\n" " phase phase heterozygotes\n" " stats generate stats (former bamcheck)\n" "\n" " -- Viewing\n" " flags explain BAM flags\n" " tview text alignment viewer\n" " view SAM<->BAM<->CRAM conversion\n" " depad convert padded BAM to unpadded BAM\n" "\n"); #ifdef _WIN32 fprintf(fp, "Note: The Windows version of SAMtools is mainly designed for read-only\n" " operations, such as viewing the alignments and generating the pileup.\n" " Binary files generated by the Windows version may be buggy.\n\n"); #endif }
void usage_main() { fprintf(stderr, "PileOMeth: A tool for processing bisulfite sequencing alignments.\n" "Version: %s (using HTSlib version %s)\n", VERSION, hts_version()); fprintf(stderr, "Usage: PileOMeth <command> [options]\n\n" "Commands:\n" " mbias Determine the position-dependent methylation bias in a dataset,\n" " producing diagnostic SVG images.\n" " extract Extract methylation metrics from an alignment file in BAM/CRAM\n" " format.\n" " mergeContext Combine single Cytosine metrics from 'PileOMeth extract' into\n" " per-CpG/CHG metrics.\n" ); }
static int irods_init() { kstring_t useragent = { 0, 0, NULL }; struct sigaction pipehandler; rErrMsg_t err; int ret, pipehandler_ret; if (hts_verbose >= 5) rodsLogLevel(hts_verbose); ret = getRodsEnv(&irods.env); if (ret < 0) goto error; // Set iRODS User-Agent, if our caller hasn't already done so. kputs("htslib/", &useragent); kputs(hts_version(), &useragent); (void) setenv(SP_OPTION, useragent.s, 0); free(useragent.s); // Prior to iRODS 4.1, rcConnect() (even if it fails) installs its own // SIGPIPE handler, which just prints a message and otherwise ignores the // signal. Most actual SIGPIPEs encountered will pertain to e.g. stdout // rather than iRODS's connection, so we save and restore the existing // state (by default, termination; or as already set by our caller). pipehandler_ret = sigaction(SIGPIPE, NULL, &pipehandler); irods.conn = rcConnect(irods.env.rodsHost, irods.env.rodsPort, irods.env.rodsUserName, irods.env.rodsZone, NO_RECONN, &err); if (pipehandler_ret == 0) sigaction(SIGPIPE, &pipehandler, NULL); if (irods.conn == NULL) { ret = err.status; goto error; } if (strcmp(irods.env.rodsUserName, PUBLIC_USER_NAME) != 0) { #if defined IRODS_VERSION_INTEGER && IRODS_VERSION_INTEGER >= 4000000 ret = clientLogin(irods.conn, NULL, NULL); #else ret = clientLogin(irods.conn); #endif if (ret != 0) goto error; } return 0; error: if (irods.conn) { (void) rcDisconnect(irods.conn); } irods.conn = NULL; set_errno(ret); return -1; }
int bcftools_main(int argc, char *argv[]) { if (argc < 2) { usage(bcftools_stderr); return 1; } if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) { fprintf(bcftools_stdout, "bcftools %s\nUsing htslib %s\nCopyright (C) 2016 Genome Research Ltd.\n", bcftools_version(), hts_version()); #if USE_GPL fprintf(bcftools_stdout, "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n"); #else fprintf(bcftools_stdout, "License Expat: The MIT/Expat license\n"); #endif fprintf(bcftools_stdout, "This is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n"); return 0; } else if (strcmp(argv[1], "--version-only") == 0) { fprintf(bcftools_stdout, "%s+htslib-%s\n", bcftools_version(), hts_version()); return 0; } else if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) { if (argc == 2) { usage(bcftools_stdout); return 0; } // Otherwise change "bcftools help COMMAND [...]" to "bcftools COMMAND"; // main_xyz() functions by convention display the subcommand's usage // when invoked without any arguments. argv++; argc = 2; } else if ( argv[1][0]=='+' ) { // "bcftools plugin name" can be run as "bcftools +name" argv[1]++; argv[0] = "plugin"; argv--; argc++; } int i = 0; while (cmds[i].alias) { if (cmds[i].func && strcmp(argv[1],cmds[i].alias)==0) { return cmds[i].func(argc-1,argv+1); } i++; } fprintf(bcftools_stderr, "[E::%s] unrecognized command '%s'\n", __func__, argv[1]); return 1; }
static int bgzip_main_usage(void) { fprintf(stderr, "\n"); fprintf(stderr, "Version: %s\n", hts_version()); fprintf(stderr, "Usage: bgzip [OPTIONS] [FILE] ...\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\n"); fprintf(stderr, " -c, --stdout write on standard output, keep original files unchanged\n"); fprintf(stderr, " -d, --decompress decompress\n"); fprintf(stderr, " -f, --force overwrite files without asking\n"); fprintf(stderr, " -h, --help give this help\n"); fprintf(stderr, " -i, --index compress and create BGZF index\n"); fprintf(stderr, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\n"); fprintf(stderr, " -r, --reindex (re)index compressed file\n"); fprintf(stderr, " -s, --size INT decompress INT bytes (uncompressed size)\n"); fprintf(stderr, "\n"); return 1; }
static void init_plugin(args_t *args) { static int warned_bcftools = 0, warned_htslib = 0; int ret = args->plugin.init(args->plugin.argc,args->plugin.argv,args->hdr,args->hdr_out); if ( ret<0 ) error("The plugin exited with an error: %s\n", args->plugin.name); const char *bver, *hver; args->plugin.version(&bver, &hver); if ( strcmp(bver,bcftools_version()) && !warned_bcftools ) { fprintf(stderr,"WARNING: bcftools version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", bcftools_version(),args->plugin.name,bver); warned_bcftools = 1; } if ( strcmp(hver,hts_version()) && !warned_htslib ) { fprintf(stderr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver); warned_htslib = 1; } args->drop_header += ret; }
int main(int argc, char *argv[]) { int i; int ret = 0; int defaultPort = 0; int parentPid = 0; printf("Initialzing the server..\n"); #ifdef _WIN32 { WORD wVersionRequested; // requested version WinSock API WSADATA wsadata; // Windows Sockets API data int stat; wVersionRequested = 0x0101; stat = WSAStartup(wVersionRequested, &wsadata); if (stat != 0) { fprintf(stderr, "Winsock not found!\n"); return -1; } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) { fprintf(stderr, "WINSOCK.DLL does not support version 1.1\n"); WSACleanup(); return -1; } } #endif if (argc < 2 || (argc % 2) != 0) { fprintf(stderr, "** Warning: use the webhttrack frontend if available\n"); fprintf(stderr, "usage: %s [--port <port>] [--ppid parent-pid] <path-to-html-root-dir> [key value [key value]..]\n", argv[0]); fprintf(stderr, "example: %s /usr/share/httrack/\n", argv[0]); return 1; } /* init and launch */ hts_init(); htslang_init(); /* set general keys */ #ifdef HTS_ETCPATH smallserver_setkey("ETCPATH", HTS_ETCPATH); #endif #ifdef HTS_BINPATH smallserver_setkey("BINPATH", HTS_BINPATH); #endif #ifdef HTS_LIBPATH smallserver_setkey("LIBPATH", HTS_LIBPATH); #endif #ifdef HTS_PREFIX smallserver_setkey("PREFIX", HTS_PREFIX); #endif #ifdef HTS_HTTRACKCNF smallserver_setkey("HTTRACKCNF", HTS_HTTRACKCNF); #endif #ifdef HTS_HTTRACKDIR smallserver_setkey("HTTRACKDIR", HTS_HTTRACKDIR); #endif #ifdef HTS_INET6 smallserver_setkey("INET6", "1"); #endif #ifdef HTS_USEOPENSSL smallserver_setkey("USEOPENSSL", "1"); #endif #ifdef HTS_DLOPEN smallserver_setkey("DLOPEN", "1"); #endif #ifdef HTS_USESWF smallserver_setkey("USESWF", "1"); #endif #ifdef HTS_USEZLIB smallserver_setkey("USEZLIB", "1"); #endif #ifdef _WIN32 smallserver_setkey("WIN32", "1"); #endif smallserver_setkey("HTTRACK_VERSION", HTTRACK_VERSION); smallserver_setkey("HTTRACK_VERSIONID", HTTRACK_VERSIONID); smallserver_setkey("HTTRACK_AFF_VERSION", HTTRACK_AFF_VERSION); { char tmp[32]; sprintf(tmp, "%d", -1); smallserver_setkey("HTS_PLATFORM", tmp); } smallserver_setkey("HTTRACK_WEB", HTTRACK_WEB); /* Check version compatibility */ if (hts_sizeof_opt() != sizeof(httrackp)) { fprintf(stderr, "** CRITICAL: incompatible current httrack library version %s, expected version %s", hts_version(), HTTRACK_VERSIONID); smallserver_setkey("HTTRACK_INCOMPATIBLE_VERSIONID", hts_version()); } /* protected session-id */ { char buff[1024]; char digest[32 + 2]; srand((unsigned int) time(NULL)); sprintf(buff, "%d-%d", (int) time(NULL), (int) rand()); domd5mem(buff, strlen(buff), digest, 1); smallserver_setkey("sid", digest); smallserver_setkey("_sid", digest); } /* set commandline keys */ for(i = 2; i < argc; i += 2) { if (strcmp(argv[i], "--port") == 0 && i + 1 < argc) { if (sscanf(argv[i + 1], "%d", &defaultPort) != 1 || defaultPort < 0 || defaultPort >= 65535) { fprintf(stderr, "couldn't set the port number to %s\n", argv[i + 1]); return -1; } } else if (strcmp(argv[i], "--ppid") == 0 && i + 1 < argc) { if (sscanf(argv[i + 1], "%u", &parentPid) != 1) { fprintf(stderr, "couldn't set the parent PID to %s\n", argv[i + 1]); return -1; } } else if (i + 1 < argc) { smallserver_setkey(argv[i], argv[i + 1]); } else { fprintf(stderr, "Error in commandline!\n"); return -1; } } /* sigpipe */ #ifndef _WIN32 signal(SIGPIPE, htsweb_sig_brpipe); // broken pipe (write into non-opened socket) #endif /* pinger */ if (parentPid > 0) { hts_newthread(client_ping, (void *) (uintptr_t) parentPid); background_threads++; /* Do not wait for this thread! */ smallserver_setpinghandler(pingHandler, NULL); } /* launch */ ret = help_server(argv[1], defaultPort); htsthread_wait_n(background_threads - 1); hts_uninit(); #ifdef _WIN32 WSACleanup(); #endif return ret; }
static void print_header(args_t *args, FILE *fp) { fprintf(fp, "# This file was produced by bcftools (%s+htslib-%s), the command line was:\n", bcftools_version(), hts_version()); fprintf(fp, "# \t bcftools %s ", args->argv[0]); int i; for (i=1; i<args->argc; i++) fprintf(fp, " %s",args->argv[i]); fprintf(fp, "\n# and the working directory was:\n"); fprintf(fp, "# \t %s\n#\n", args->cwd); }
int main(int argc, char **argv) { int ret = 0; httrackp *opt; #ifdef _WIN32 { WORD wVersionRequested; // requested version WinSock API WSADATA wsadata; // Windows Sockets API data int stat; wVersionRequested = 0x0101; stat = WSAStartup(wVersionRequested, &wsadata); if (stat != 0) { printf("Winsock not found!\n"); return; } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) { printf("WINSOCK.DLL does not support version 1.1\n"); WSACleanup(); return; } } #endif signal_handlers(); hts_init(); // Check version compatibility if (hts_sizeof_opt() != sizeof(httrackp)) { fprintf(stderr, "incompatible current httrack library version %s, expected version %s", hts_version(), HTTRACK_VERSIONID); abortLog("incompatible httrack library version, please update both httrack and its library"); } opt = global_opt = hts_create_opt(); assert(opt->size_httrackp == sizeof(httrackp)); CHAIN_FUNCTION(opt, init, htsshow_init, NULL); CHAIN_FUNCTION(opt, uninit, htsshow_uninit, NULL); CHAIN_FUNCTION(opt, start, htsshow_start, NULL); CHAIN_FUNCTION(opt, end, htsshow_end, NULL); CHAIN_FUNCTION(opt, chopt, htsshow_chopt, NULL); CHAIN_FUNCTION(opt, preprocess, htsshow_preprocesshtml, NULL); CHAIN_FUNCTION(opt, postprocess, htsshow_postprocesshtml, NULL); CHAIN_FUNCTION(opt, check_html, htsshow_checkhtml, NULL); CHAIN_FUNCTION(opt, query, htsshow_query, NULL); CHAIN_FUNCTION(opt, query2, htsshow_query2, NULL); CHAIN_FUNCTION(opt, query3, htsshow_query3, NULL); CHAIN_FUNCTION(opt, loop, htsshow_loop, NULL); CHAIN_FUNCTION(opt, check_link, htsshow_check, NULL); CHAIN_FUNCTION(opt, check_mime, htsshow_check_mime, NULL); CHAIN_FUNCTION(opt, pause, htsshow_pause, NULL); CHAIN_FUNCTION(opt, filesave, htsshow_filesave, NULL); CHAIN_FUNCTION(opt, filesave2, htsshow_filesave2, NULL); CHAIN_FUNCTION(opt, linkdetected, htsshow_linkdetected, NULL); CHAIN_FUNCTION(opt, linkdetected2, htsshow_linkdetected2, NULL); CHAIN_FUNCTION(opt, xfrstatus, htsshow_xfrstatus, NULL); CHAIN_FUNCTION(opt, savename, htsshow_savename, NULL); CHAIN_FUNCTION(opt, sendhead, htsshow_sendheader, NULL); CHAIN_FUNCTION(opt, receivehead, htsshow_receiveheader, NULL); ret = hts_main2(argc, argv, opt); if (ret) { fprintf(stderr, "* %s\n", hts_errmsg(opt)); } global_opt = NULL; hts_free_opt(opt); htsthread_wait(); /* wait for pending threads */ hts_uninit(); #ifdef _WIN32 WSACleanup(); #endif return ret; }
int samtools_main(int argc, char *argv[]) { #ifdef _WIN32 setmode(fileno(stdout), O_BINARY); setmode(fileno(stdin), O_BINARY); #endif if (argc < 2) { usage(pysamerr); return 1; } if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) { if (argc == 2) { usage(stdout); return 0; } // Otherwise change "samtools help COMMAND [...]" to "samtools COMMAND"; // main_xyz() functions by convention display the subcommand's usage // when invoked without any arguments. argv++; argc = 2; } int ret = 0; if (strcmp(argv[1], "view") == 0) ret = main_samview(argc-1, argv+1); else if (strcmp(argv[1], "import") == 0) ret = main_import(argc-1, argv+1); else if (strcmp(argv[1], "mpileup") == 0) ret = bam_mpileup(argc-1, argv+1); else if (strcmp(argv[1], "merge") == 0) ret = bam_merge(argc-1, argv+1); else if (strcmp(argv[1], "sort") == 0) ret = bam_sort(argc-1, argv+1); else if (strcmp(argv[1], "index") == 0) ret = bam_index(argc-1, argv+1); else if (strcmp(argv[1], "idxstats") == 0) ret = bam_idxstats(argc-1, argv+1); else if (strcmp(argv[1], "faidx") == 0) ret = faidx_main(argc-1, argv+1); else if (strcmp(argv[1], "dict") == 0) ret = dict_main(argc-1, argv+1); else if (strcmp(argv[1], "fixmate") == 0) ret = bam_mating(argc-1, argv+1); else if (strcmp(argv[1], "rmdup") == 0) ret = bam_rmdup(argc-1, argv+1); else if (strcmp(argv[1], "flagstat") == 0) ret = bam_flagstat(argc-1, argv+1); else if (strcmp(argv[1], "calmd") == 0) ret = bam_fillmd(argc-1, argv+1); else if (strcmp(argv[1], "fillmd") == 0) ret = bam_fillmd(argc-1, argv+1); else if (strcmp(argv[1], "reheader") == 0) ret = main_reheader(argc-1, argv+1); else if (strcmp(argv[1], "cat") == 0) ret = main_cat(argc-1, argv+1); else if (strcmp(argv[1], "targetcut") == 0) ret = main_cut_target(argc-1, argv+1); else if (strcmp(argv[1], "phase") == 0) ret = main_phase(argc-1, argv+1); else if (strcmp(argv[1], "depth") == 0) ret = main_depth(argc-1, argv+1); else if (strcmp(argv[1], "bam2fq") == 0 || strcmp(argv[1], "fastq") == 0 || strcmp(argv[1], "fasta") == 0) ret = main_bam2fq(argc-1, argv+1); else if (strcmp(argv[1], "pad2unpad") == 0) ret = main_pad2unpad(argc-1, argv+1); else if (strcmp(argv[1], "depad") == 0) ret = main_pad2unpad(argc-1, argv+1); else if (strcmp(argv[1], "bedcov") == 0) ret = main_bedcov(argc-1, argv+1); else if (strcmp(argv[1], "bamshuf") == 0) ret = main_bamshuf(argc-1, argv+1); else if (strcmp(argv[1], "collate") == 0) ret = main_bamshuf(argc-1, argv+1); else if (strcmp(argv[1], "stats") == 0) ret = main_stats(argc-1, argv+1); else if (strcmp(argv[1], "flags") == 0) ret = main_flags(argc-1, argv+1); else if (strcmp(argv[1], "split") == 0) ret = main_split(argc-1, argv+1); else if (strcmp(argv[1], "quickcheck") == 0) ret = main_quickcheck(argc-1, argv+1); else if (strcmp(argv[1], "addreplacerg") == 0) ret = main_addreplacerg(argc-1, argv+1); else if (strcmp(argv[1], "pileup") == 0) { fprintf(pysamerr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n"); return 1; } else if (strcmp(argv[1], "tview") == 0) ret = bam_tview_main(argc-1, argv+1); else if (strcmp(argv[1], "--version") == 0) { printf( "samtools %s\n" "Using htslib %s\n" "Copyright (C) 2015 Genome Research Ltd.\n", samtools_version(), hts_version()); } else if (strcmp(argv[1], "--version-only") == 0) { printf("%s+htslib-%s\n", samtools_version(), hts_version()); } else { fprintf(pysamerr, "[main] unrecognized command '%s'\n", argv[1]); return 1; } return ret; }
int main(int argc, char *argv[]) { int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; tbx_conf_t conf = tbx_conf_gff; char *reheader = NULL; args_t args; memset(&args,0,sizeof(args_t)); static const struct option loptions[] = { {"help", no_argument, NULL, 2}, {"regions", required_argument, NULL, 'R'}, {"targets", required_argument, NULL, 'T'}, {"csi", no_argument, NULL, 'C'}, {"zero-based", no_argument, NULL, '0'}, {"print-header", no_argument, NULL, 'h'}, {"only-header", no_argument, NULL, 'H'}, {"begin", required_argument, NULL, 'b'}, {"comment", required_argument, NULL, 'c'}, {"end", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"preset", required_argument, NULL, 'p'}, {"sequence", required_argument, NULL, 's'}, {"skip-lines", required_argument, NULL, 'S'}, {"list-chroms", no_argument, NULL, 'l'}, {"reheader", required_argument, NULL, 'r'}, {"version", no_argument, NULL, 1}, {NULL, 0, NULL, 0} }; char *tmp; while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0) { switch (c) { case 'R': args.regions_fname = optarg; break; case 'T': args.targets_fname = optarg; break; case 'C': do_csi = 1; break; case 'r': reheader = optarg; break; case 'h': args.print_header = 1; break; case 'H': args.print_header = 1; args.header_only = 1; break; case 'l': list_chroms = 1; break; case '0': conf.preset |= TBX_UCSC; detect = 0; break; case 'b': conf.bc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -b %s\n", optarg); detect = 0; break; case 'e': conf.ec = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -e %s\n", optarg); detect = 0; break; case 'c': conf.meta_char = *optarg; detect = 0; break; case 'f': is_force = 1; break; case 'm': min_shift = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -m %s\n", optarg); break; case 'p': detect = 0; if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff; else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed; else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam; else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf; else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf else error("The preset string not recognised: '%s'\n", optarg); break; case 's': conf.sc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -s %s\n", optarg); detect = 0; break; case 'S': conf.line_skip = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -S %s\n", optarg); detect = 0; break; case 1: printf( "tabix (htslib) %s\n" "Copyright (C) 2017 Genome Research Ltd.\n", hts_version()); return EXIT_SUCCESS; default: return usage(); } } if ( optind==argc ) return usage(); if ( list_chroms ) return query_chroms(argv[optind]); if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) { int nregs = 0; char **regs = NULL; if ( !args.header_only ) regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); return query_regions(&args, argv[optind], regs, nregs); } char *fname = argv[optind]; int ftype = file_type(fname); if ( detect ) // no preset given { if ( ftype==IS_GFF ) conf = tbx_conf_gff; else if ( ftype==IS_BED ) conf = tbx_conf_bed; else if ( ftype==IS_SAM ) conf = tbx_conf_sam; else if ( ftype==IS_VCF ) { conf = tbx_conf_vcf; if ( !min_shift && do_csi ) min_shift = 14; } else if ( ftype==IS_BCF ) { if ( !min_shift ) min_shift = 14; } else if ( ftype==IS_BAM ) { if ( !min_shift ) min_shift = 14; } } if ( do_csi ) { if ( !min_shift ) min_shift = 14; min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 } if ( min_shift!=0 && !do_csi ) do_csi = 1; if ( reheader ) return reheader_file(fname, reheader, ftype, &conf); char *suffix = ".tbi"; if ( do_csi ) suffix = ".csi"; else if ( ftype==IS_BAM ) suffix = ".bai"; else if ( ftype==IS_CRAM ) suffix = ".crai"; char *idx_fname = calloc(strlen(fname) + 5, 1); strcat(strcpy(idx_fname, fname), suffix); struct stat stat_tbi, stat_file; if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) { // Before complaining about existing index, check if the VCF file isn't // newer. This is a common source of errors, people tend not to notice // that tabix failed stat(fname, &stat_file); if ( stat_file.st_mtime <= stat_tbi.st_mtime ) error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); } free(idx_fname); if ( ftype==IS_CRAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } else if ( do_csi ) { if ( ftype==IS_BCF ) { if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname); return 0; } if ( ftype==IS_BAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname); return 0; } else // TBI index { if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname); return 0; } return 0; }
int main(int argc, char **argv) { int c, compress, pstdout, is_forced, index = 0, rebgzip = 0, reindex = 0; BGZF *fp; void *buffer; long start, end, size; char *index_fname = NULL; int threads = 1; static const struct option loptions[] = { {"help", no_argument, NULL, 'h'}, {"offset", required_argument, NULL, 'b'}, {"stdout", no_argument, NULL, 'c'}, {"decompress", no_argument, NULL, 'd'}, {"force", no_argument, NULL, 'f'}, {"index", no_argument, NULL, 'i'}, {"index-name", required_argument, NULL, 'I'}, {"reindex", no_argument, NULL, 'r'}, {"rebgzip",no_argument,NULL,'g'}, {"size", required_argument, NULL, 's'}, {"threads", required_argument, NULL, '@'}, {"version", no_argument, NULL, 1}, {NULL, 0, NULL, 0} }; compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; while((c = getopt_long(argc, argv, "cdh?fb:@:s:iI:gr",loptions,NULL)) >= 0){ switch(c){ case 'd': compress = 0; break; case 'c': pstdout = 1; break; case 'b': start = atol(optarg); compress = 0; pstdout = 1; break; case 's': size = atol(optarg); pstdout = 1; break; case 'f': is_forced = 1; break; case 'i': index = 1; break; case 'I': index_fname = optarg; break; case 'g': rebgzip = 1; break; case 'r': reindex = 1; compress = 0; break; case '@': threads = atoi(optarg); break; case 1: printf( "bgzip (htslib) %s\n" "Copyright (C) 2017 Genome Research Ltd.\n", hts_version()); return EXIT_SUCCESS; case 'h': case '?': return bgzip_main_usage(); } } if (size >= 0) end = start + size; if (end >= 0 && end < start) { fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); return 1; } if (compress == 1) { struct stat sbuf; int f_src = fileno(stdin); if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if ((f_src = open(argv[optind], O_RDONLY)) < 0) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } if (pstdout) fp = bgzf_open("-", "w"); else { char *name = malloc(strlen(argv[optind]) + 5); strcpy(name, argv[optind]); strcat(name, ".gz"); fp = bgzf_open(name, is_forced? "w" : "wx"); if (fp == NULL && errno == EEXIST && confirm_overwrite(name)) fp = bgzf_open(name, "w"); if (fp == NULL) { fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); free(name); return 1; } free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdout)) ) return bgzip_main_usage(); else if ( index && !index_fname ) { fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); return 1; } else fp = bgzf_open("-", "w"); if ( index && rebgzip ) { fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n"); return 1; } if ( rebgzip && !index_fname ) { fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n"); return 1; } if (threads > 1) bgzf_mt(fp, threads, 256); if ( index ) bgzf_index_build_init(fp); buffer = malloc(WINDOW_SIZE); #ifdef _WIN32 _setmode(f_src, O_BINARY); #endif if (rebgzip){ if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]); while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); } else { while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode); } if ( index ) { if (index_fname) { if (bgzf_index_dump(fp, index_fname, NULL) < 0) error("Could not write index to '%s'\n", index_fname); } else { if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0) error("Could not write index to '%s.gz.gzi'", argv[optind]); } } if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode); if (argc > optind && !pstdout) unlink(argv[optind]); free(buffer); close(f_src); return 0; } else if ( reindex ) { if ( argc>optind ) { fp = bgzf_open(argv[optind], "r"); if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]); } else { if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n"); fp = bgzf_open("-", "r"); if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno)); } buffer = malloc(BGZF_BLOCK_SIZE); bgzf_index_build_init(fp); int ret; while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ; free(buffer); if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n"); if ( index_fname ) { if (bgzf_index_dump(fp, index_fname, NULL) < 0) error("Could not write index to '%s'\n", index_fname); } else { if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0) error("Could not write index to '%s.gzi'\n", argv[optind]); } if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode); return 0; } else { struct stat sbuf; int f_dst; if ( argc>optind ) { if ( stat(argv[optind],&sbuf)<0 ) { fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); return 1; } char *name; int len = strlen(argv[optind]); if ( strcmp(argv[optind]+len-3,".gz") ) { fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); return 1; } fp = bgzf_open(argv[optind], "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); return 1; } if (pstdout) { f_dst = fileno(stdout); } else { const int wrflags = O_WRONLY | O_CREAT | O_TRUNC; name = strdup(argv[optind]); name[strlen(name) - 3] = '\0'; f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666); if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name)) f_dst = open(name, wrflags, 0666); if (f_dst < 0) { fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno)); free(name); return 1; } free(name); } } else if (!pstdout && isatty(fileno((FILE *)stdin)) ) return bgzip_main_usage(); else { f_dst = fileno(stdout); fp = bgzf_open("-", "r"); if (fp == NULL) { fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); return 1; } } if (threads > 1) bgzf_mt(fp, threads, 256); buffer = malloc(WINDOW_SIZE); if ( start>0 ) { if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]); if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start); } #ifdef _WIN32 _setmode(f_dst, O_BINARY); #endif while (1) { if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); if (c == 0) break; if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode); start += c; if ( write(f_dst, buffer, c) != c ) { #ifdef _WIN32 if (GetLastError() != ERROR_NO_DATA) #endif error("Could not write %d bytes\n", c); } if (end >= 0 && start >= end) break; } free(buffer); if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode); if (!pstdout) unlink(argv[optind]); return 0; } }
multiReader::multiReader(int argc,char**argv){ gz=Z_NULL; myglf=NULL;myvcf=NULL;mpil=NULL;bglObj=NULL; nLines=50; fname=NULL; intName=1; minQ = MINQ; nInd =0; isSim =0; args=NULL; args = setArgStruct(argc,argv); fprintf(args->argumentFile,"\t-> Command: \n"); for(int i=0;i<argc;i++) fprintf(args->argumentFile,"%s ",argv[i]); // fprintf(args->argumentFile,"\n\n"); if(args->argumentFile!=stderr) fprintf(args->argumentFile,"\n\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); void printTime(FILE *fp); printTime(args->argumentFile); //type = args->inputtype; if(args->argc==2) { if((!strcasecmp(args->argv[1],"-beagle")) || (!strcasecmp(args->argv[1],"-glf")) || (!strcasecmp(args->argv[1],"-glf3")) || (!strcasecmp(args->argv[1],"-pileup")) || (!strcasecmp(args->argv[1],"-vcf-GL")) || (!strcasecmp(args->argv[1],"-vcf-pl")) || (!strcasecmp(args->argv[1],"-glf10_text")) || (!strcasecmp(args->argv[1],"-vcf-GP"))) { printArg(stdout,args); exit(0); }else if ((!strcasecmp(args->argv[1],"-bam")) || (!strcasecmp(args->argv[1],"-b"))){ setArgsBam(args); exit(0); }else return; } getOptions(args); if(args->fai==NULL){ int printAndExit =0; switch(args->inputtype) { case INPUT_GLF: printAndExit=1; break; case INPUT_GLF10_TEXT: printAndExit=1; break; case INPUT_GLF3: printAndExit=1; break; case INPUT_BEAGLE: printAndExit=1; break; case INPUT_PILEUP: printAndExit=1; break; } if(printAndExit){ fprintf(stderr,"\t-> Must supply -fai file\n"); exit(0); } } if(args->fai){ if(!(args->hd=getHeadFromFai(args->fai))) exit(0); }else{ if(args->nams.size()==0){ fprintf(stderr,"\t-> Must choose inputfile -bam/-glf/-glf3/-pileup/-i/-vcf-gl/-vcf-gp/-vcf-pl/-glf10_text filename\n"); exit(0); } if(args->inputtype==INPUT_BAM){ htsFile *in=sam_open(args->nams[0],"r"); assert(in); args->hd= sam_hdr_read(in); hts_close(in); } } if(!(INPUT_VCF_GL||INPUT_VCF_GP)){ if(args->hd==NULL){ fprintf(stderr,"For non-bams you should include -fai arguments\n"); exit(0); } } if((args->inputtype==INPUT_PILEUP||args->inputtype==INPUT_GLF||args->inputtype==INPUT_GLF3||args->inputtype==INPUT_GLF10_TEXT)){ if(nInd==0){ fprintf(stderr,"\t-> Must supply -nInd when using -glf/-glf3/-pileup/-glf10_text files\n"); exit(0); } }else args->nInd = args->nams.size(); if(args->inputtype==INPUT_VCF_GP||args->inputtype==INPUT_VCF_GL){ if(args->regions.size()>1){ fprintf(stderr,"\t-> Only one region can be specified with using bcf (i doubt more is needed) will exit\n"); exit(0); }else if(args->regions.size()<=1){ myvcf = new vcfReader(args->infile,NULL); args->hd=bcf_hdr_2_bam_hdr_t(myvcf->hs); args->nInd = myvcf->hs->nsamples; } } //make args->hd revMap = buildRevTable(args->hd); args->revMap = revMap; setArgsBam(args); if(args->inputtype==INPUT_VCF_GL){ if(args->regions.size()==1){ char tmp[1024]; int start=args->regions[0].start; int stop=args->regions[0].stop; int ref=args->regions[0].refID; snprintf(tmp,1024,"%s:%d-%d",args->hd->target_name[ref],start+1,stop); // fprintf(stderr,"tmp:%s\n",tmp); // exit(0); myvcf->seek(tmp); } } if(fname==NULL) return; gz=Z_NULL; gz=gzopen(fname,"r"); if(gz==Z_NULL){ fprintf(stderr,"\t-> Problem opening file: \'%s\'\n",fname); exit(0); } switch(args->inputtype){ case INPUT_PILEUP:{ mpil = new mpileup(args->nInd,gz,args->revMap,minQ); break; } case INPUT_GLF:{ myglf = new glfReader(args->nInd,gz,10,isSim); break; } case INPUT_GLF3:{ isSim = 1; //Added by FGV on 22/02/2015: GLF3 is always simulated data until a better alternative can be found myglf = new glfReader(args->nInd,gz,3,isSim); break; } case INPUT_GLF10_TEXT:{ myglf_text = new glfReader_text(args->nInd,gz,args->revMap); break; } case INPUT_BEAGLE:{ bglObj = new beagle_reader(gz,args->revMap,intName,args->nInd); break; } default:{ break; } } if(args->inputtype==INPUT_VCF_GL||args->inputtype==INPUT_VCF_GL){ fprintf(stderr,"\t-> VCF still beta. Remember that\n"); fprintf(stderr,"\t 1. indels are are discarded\n"); fprintf(stderr,"\t 2. will use chrom, pos PL columns\n"); fprintf(stderr,"\t 3. GL tags are interpreted as log10 and are scaled to ln (NOT USED)\n"); fprintf(stderr,"\t 4. GP tags are interpreted directly as unscaled post probs (spec says phredscaled...) (NOT USED)\n"); fprintf(stderr,"\t 5. FILTER column is currently NOT used (not sure what concensus is)\n"); fprintf(stderr,"\t 6. -sites does NOT work with vcf input but -r does\n"); fprintf(stderr,"\t 7. vcffilereading is still BETA, please report strange behaviour\n"); } }
static int mpileup(mplp_conf_t *conf) { if (conf->nfiles == 0) { fprintf(stderr,"[%s] no input file/data given\n", __func__); exit(EXIT_FAILURE); } mplp_ref_t mp_ref = MPLP_REF_INIT; conf->gplp = (mplp_pileup_t *) calloc(1,sizeof(mplp_pileup_t)); conf->mplp_data = (mplp_aux_t**) calloc(conf->nfiles, sizeof(mplp_aux_t*)); conf->plp = (const bam_pileup1_t**) calloc(conf->nfiles, sizeof(bam_pileup1_t*)); conf->n_plp = (int*) calloc(conf->nfiles, sizeof(int)); // Allow to run mpileup on multiple regions in one go. This comes at cost: the bai index // must be kept in the memory for the whole time which can be a problem with many bams. // Therefore if none or only one region is requested, we initialize the bam iterator as // before and free the index. Only when multiple regions are queried, we keep the index. int nregs = 0; if ( conf->reg_fname ) { if ( conf->reg_is_file ) { conf->reg = regidx_init(conf->reg_fname,NULL,NULL,0,NULL); if ( !conf->reg ) { fprintf(stderr,"Could not parse the regions: %s\n", conf->reg_fname); exit(EXIT_FAILURE); } } else { conf->reg = regidx_init(NULL,regidx_parse_reg,NULL,sizeof(char*),NULL); if ( regidx_insert_list(conf->reg,conf->reg_fname,',') !=0 ) { fprintf(stderr,"Could not parse the regions: %s\n", conf->reg_fname); exit(EXIT_FAILURE); } } nregs = regidx_nregs(conf->reg); conf->reg_itr = regitr_init(conf->reg); regitr_loop(conf->reg_itr); // region iterator now positioned at the first region } // read the header of each file in the list and initialize data // beware: mpileup has always assumed that tid's are consistent in the headers, add sanity check at least! bam_hdr_t *hdr = NULL; // header of first file in input list int i; for (i = 0; i < conf->nfiles; ++i) { bam_hdr_t *h_tmp; conf->mplp_data[i] = (mplp_aux_t*) calloc(1, sizeof(mplp_aux_t)); conf->mplp_data[i]->fp = sam_open(conf->files[i], "rb"); if ( !conf->mplp_data[i]->fp ) { fprintf(stderr, "[%s] failed to open %s: %s\n", __func__, conf->files[i], strerror(errno)); exit(EXIT_FAILURE); } if (hts_set_opt(conf->mplp_data[i]->fp, CRAM_OPT_DECODE_MD, 0)) { fprintf(stderr, "Failed to set CRAM_OPT_DECODE_MD value\n"); exit(EXIT_FAILURE); } if (conf->fai_fname && hts_set_fai_filename(conf->mplp_data[i]->fp, conf->fai_fname) != 0) { fprintf(stderr, "[%s] failed to process %s: %s\n", __func__, conf->fai_fname, strerror(errno)); exit(EXIT_FAILURE); } conf->mplp_data[i]->conf = conf; conf->mplp_data[i]->ref = &mp_ref; h_tmp = sam_hdr_read(conf->mplp_data[i]->fp); if ( !h_tmp ) { fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, conf->files[i]); exit(EXIT_FAILURE); } conf->mplp_data[i]->h = i ? hdr : h_tmp; // for j==0, "h" has not been set yet conf->mplp_data[i]->bam_id = bam_smpl_add_bam(conf->bsmpl,h_tmp->text,conf->files[i]); if ( conf->mplp_data[i]->bam_id<0 ) { // no usable readgroups in this bam, it can be skipped sam_close(conf->mplp_data[i]->fp); free(conf->mplp_data[i]); bam_hdr_destroy(h_tmp); free(conf->files[i]); if ( i+1<conf->nfiles ) memmove(&conf->files[i],&conf->files[i+1],sizeof(*conf->files)*(conf->nfiles-i-1)); conf->nfiles--; i--; continue; } if (conf->reg) { hts_idx_t *idx = sam_index_load(conf->mplp_data[i]->fp, conf->files[i]); if (idx == NULL) { fprintf(stderr, "[%s] fail to load index for %s\n", __func__, conf->files[i]); exit(EXIT_FAILURE); } conf->buf.l = 0; ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1); conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->buf.s); if ( !conf->mplp_data[i]->iter ) { conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->reg_itr->seq); if ( conf->mplp_data[i]->iter ) { fprintf(stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s); exit(EXIT_FAILURE); } fprintf(stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]); exit(EXIT_FAILURE); } if ( nregs==1 ) // no need to keep the index in memory hts_idx_destroy(idx); else conf->mplp_data[i]->idx = idx; } if ( !hdr ) hdr = h_tmp; /* save the header of first file in list */ else { // FIXME: check consistency between h and h_tmp bam_hdr_destroy(h_tmp); // we store only the first file's header; it's (alleged to be) // compatible with the i-th file's target_name lookup needs conf->mplp_data[i]->h = hdr; } } // allocate data storage proportionate to number of samples being studied sm->n bam_smpl_get_samples(conf->bsmpl, &conf->gplp->n); conf->gplp->n_plp = (int*) calloc(conf->gplp->n, sizeof(int)); conf->gplp->m_plp = (int*) calloc(conf->gplp->n, sizeof(int)); conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*)); fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, conf->gplp->n, conf->nfiles); // write the VCF header conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode(conf->output_type)); if (conf->bcf_fp == NULL) { fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno)); exit(EXIT_FAILURE); } if ( conf->n_threads ) hts_set_threads(conf->bcf_fp, conf->n_threads); // BCF header creation conf->bcf_hdr = bcf_hdr_init("w"); conf->buf.l = 0; if (conf->record_cmd_line) { ksprintf(&conf->buf, "##bcftoolsVersion=%s+htslib-%s\n",bcftools_version(),hts_version()); bcf_hdr_append(conf->bcf_hdr, conf->buf.s); conf->buf.l = 0; ksprintf(&conf->buf, "##bcftoolsCommand=mpileup"); for (i=1; i<conf->argc; i++) ksprintf(&conf->buf, " %s", conf->argv[i]); kputc('\n', &conf->buf); bcf_hdr_append(conf->bcf_hdr, conf->buf.s); } if (conf->fai_fname) { conf->buf.l = 0; ksprintf(&conf->buf, "##reference=file://%s\n", conf->fai_fname); bcf_hdr_append(conf->bcf_hdr, conf->buf.s); } // Translate BAM @SQ tags to BCF ##contig tags // todo: use/write new BAM header manipulation routines, fill also UR, M5 for (i=0; i<hdr->n_targets; i++) { conf->buf.l = 0; ksprintf(&conf->buf, "##contig=<ID=%s,length=%d>", hdr->target_name[i], hdr->target_len[i]); bcf_hdr_append(conf->bcf_hdr, conf->buf.s); } conf->buf.l = 0; bcf_hdr_append(conf->bcf_hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">"); #if CDF_MWU_TESTS bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">"); #endif bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">"); bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">"); bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">"); if ( conf->fmt_flag&B2B_FMT_DP ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">"); if ( conf->fmt_flag&B2B_FMT_DV ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">"); if ( conf->fmt_flag&B2B_FMT_DPR ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">"); if ( conf->fmt_flag&B2B_INFO_DPR ) bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">"); if ( conf->fmt_flag&B2B_FMT_DP4 ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">"); if ( conf->fmt_flag&B2B_FMT_SP ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">"); if ( conf->fmt_flag&B2B_FMT_AD ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">"); if ( conf->fmt_flag&B2B_FMT_ADF ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">"); if ( conf->fmt_flag&B2B_FMT_ADR ) bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">"); if ( conf->fmt_flag&B2B_INFO_AD ) bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">"); if ( conf->fmt_flag&B2B_INFO_ADF ) bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">"); if ( conf->fmt_flag&B2B_INFO_ADR ) bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">"); if ( conf->gvcf ) gvcf_update_header(conf->gvcf, conf->bcf_hdr); int nsmpl; const char **smpl = bam_smpl_get_samples(conf->bsmpl, &nsmpl); for (i=0; i<nsmpl; i++) bcf_hdr_add_sample(conf->bcf_hdr, smpl[i]); bcf_hdr_write(conf->bcf_fp, conf->bcf_hdr); conf->bca = bcf_call_init(-1., conf->min_baseQ); conf->bcr = (bcf_callret1_t*) calloc(nsmpl, sizeof(bcf_callret1_t)); conf->bca->openQ = conf->openQ, conf->bca->extQ = conf->extQ, conf->bca->tandemQ = conf->tandemQ; conf->bca->min_frac = conf->min_frac; conf->bca->min_support = conf->min_support; conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE; conf->bc.bcf_hdr = conf->bcf_hdr; conf->bc.n = nsmpl; conf->bc.PL = (int32_t*) malloc(15 * nsmpl * sizeof(*conf->bc.PL)); if (conf->fmt_flag) { assert( sizeof(float)==sizeof(int32_t) ); conf->bc.DP4 = (int32_t*) malloc(nsmpl * sizeof(int32_t) * 4); conf->bc.fmt_arr = (uint8_t*) malloc(nsmpl * sizeof(float)); // all fmt_flag fields, float and int32 if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR|B2B_INFO_AD|B2B_INFO_ADF|B2B_INFO_ADR|B2B_FMT_AD|B2B_FMT_ADF|B2B_FMT_ADR) ) { // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample conf->bc.ADR = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t)); conf->bc.ADF = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t)); for (i=0; i<nsmpl; i++) { conf->bcr[i].ADR = conf->bc.ADR + (i+1)*B2B_MAX_ALLELES; conf->bcr[i].ADF = conf->bc.ADF + (i+1)*B2B_MAX_ALLELES; } } } // init mpileup conf->iter = bam_mplp_init(conf->nfiles, mplp_func, (void**)conf->mplp_data); if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(conf->iter); if ( (double)conf->max_depth * conf->nfiles > 1<<20) fprintf(stderr, "Warning: Potential memory hog, up to %.0fM reads in the pileup!\n", (double)conf->max_depth*conf->nfiles); if ( (double)conf->max_depth * conf->nfiles / nsmpl < 250 ) fprintf(stderr, "Note: The maximum per-sample depth with -d %d is %.1fx\n", conf->max_depth,(double)conf->max_depth * conf->nfiles / nsmpl); bam_mplp_set_maxcnt(conf->iter, conf->max_depth); conf->max_indel_depth = conf->max_indel_depth * nsmpl; conf->bcf_rec = bcf_init1(); bam_mplp_constructor(conf->iter, pileup_constructor); // Run mpileup for multiple regions if ( nregs ) { int ireg = 0; do { // first region is already positioned if ( ireg++ > 0 ) { conf->buf.l = 0; ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg,conf->reg_itr->end); for (i=0; i<conf->nfiles; i++) { hts_itr_destroy(conf->mplp_data[i]->iter); conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->buf.s); if ( !conf->mplp_data[i]->iter ) { conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->reg_itr->seq); if ( conf->mplp_data[i]->iter ) { fprintf(stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s); exit(EXIT_FAILURE); } fprintf(stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]); exit(EXIT_FAILURE); } bam_mplp_reset(conf->iter); } } mpileup_reg(conf,conf->reg_itr->beg,conf->reg_itr->end); } while ( regitr_loop(conf->reg_itr) ); } else mpileup_reg(conf,0,0); flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, NULL); // clean up free(conf->bc.tmp.s); bcf_destroy1(conf->bcf_rec); if (conf->bcf_fp) { hts_close(conf->bcf_fp); bcf_hdr_destroy(conf->bcf_hdr); bcf_call_destroy(conf->bca); free(conf->bc.PL); free(conf->bc.DP4); free(conf->bc.ADR); free(conf->bc.ADF); free(conf->bc.fmt_arr); free(conf->bcr); } if ( conf->gvcf ) gvcf_destroy(conf->gvcf); free(conf->buf.s); for (i = 0; i < conf->gplp->n; ++i) free(conf->gplp->plp[i]); free(conf->gplp->plp); free(conf->gplp->n_plp); free(conf->gplp->m_plp); free(conf->gplp); bam_mplp_destroy(conf->iter); bam_hdr_destroy(hdr); for (i = 0; i < conf->nfiles; ++i) { if ( nregs>1 ) hts_idx_destroy(conf->mplp_data[i]->idx); sam_close(conf->mplp_data[i]->fp); if ( conf->mplp_data[i]->iter) hts_itr_destroy(conf->mplp_data[i]->iter); free(conf->mplp_data[i]); } if ( conf->reg_itr ) regitr_destroy(conf->reg_itr); free(conf->mplp_data); free(conf->plp); free(conf->n_plp); free(mp_ref.ref[0]); free(mp_ref.ref[1]); return 0; }
int main_plugin(int argc, char *argv[]) { int c; args_t *args = (args_t*) calloc(1,sizeof(args_t)); args->argc = argc; args->argv = argv; args->output_fname = "-"; args->output_type = FT_VCF; args->nplugin_paths = -1; int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0; if ( argc==1 ) usage(args); char *plugin_name = NULL; if ( argv[1][0]!='-' ) { plugin_name = argv[1]; argc--; argv++; } static struct option loptions[] = { {"version",0,0,'V'}, {"verbose",0,0,'v'}, {"help",0,0,'h'}, {"list-plugins",0,0,'l'}, {"output",1,0,'o'}, {"output-type",1,0,'O'}, {"include",1,0,'i'}, {"exclude",1,0,'e'}, {"regions",1,0,'r'}, {"regions-file",1,0,'R'}, {"targets",1,0,'t'}, {"targets-file",1,0,'T'}, {0,0,0,0} }; while ((c = getopt_long(argc, argv, "h?o:O:r:R:t:T:li:e:vV",loptions,NULL)) >= 0) { switch (c) { case 'V': version_only = 1; break; case 'v': args->verbose = 1; break; case 'o': args->output_fname = optarg; break; case 'O': switch (optarg[0]) { case 'b': args->output_type = FT_BCF_GZ; break; case 'u': args->output_type = FT_BCF; break; case 'z': args->output_type = FT_VCF_GZ; break; case 'v': args->output_type = FT_VCF; break; default: error("The output type \"%s\" not recognised\n", optarg); }; break; case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break; case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break; case 'r': args->regions_list = optarg; break; case 'R': args->regions_list = optarg; regions_is_file = 1; break; case 't': args->targets_list = optarg; break; case 'T': args->targets_list = optarg; targets_is_file = 1; break; case 'l': plist_only = 1; break; case '?': case 'h': usage_only = 1; break; default: error("Unknown argument: %s\n", optarg); } } if ( plist_only ) return list_plugins(args); if ( usage_only && ! plugin_name ) usage(args); load_plugin(args, plugin_name, 1, &args->plugin); if ( version_only ) { const char *bver, *hver; args->plugin.version(&bver, &hver); printf("bcftools %s using htslib %s\n", bcftools_version(), hts_version()); printf("plugin at %s using htslib %s\n\n", bver, hver); return 0; } if ( usage_only ) { if ( args->plugin.usage ) fprintf(stderr,"%s",args->plugin.usage()); else fprintf(stderr,"Usage: bcftools +%s [General Options] -- [Plugin Options]\n",plugin_name); return 0; } if ( args->plugin.run ) { int iopt = optind; optind = 0; int ret = args->plugin.run(argc-iopt, argv+iopt); destroy_data(args); free(args); return ret; } char *fname = NULL; if ( optind>=argc || argv[optind][0]=='-' ) { if ( !isatty(fileno((FILE *)stdin)) ) fname = "-"; // reading from stdin else usage(args); args->plugin.argc = argc - optind + 1; args->plugin.argv = argv + optind - 1; } else { fname = argv[optind]; args->plugin.argc = argc - optind; args->plugin.argv = argv + optind; } optind = 0; args->files = bcf_sr_init(); if ( args->regions_list ) { if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 ) error("Failed to read the regions: %s\n", args->regions_list); } if ( args->targets_list ) { if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", args->targets_list); args->files->collapse |= COLLAPSE_SOME; } if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum)); init_data(args); while ( bcf_sr_next_line(args->files) ) { bcf1_t *line = bcf_sr_get_line(args->files,0); if ( args->filter ) { int pass = filter_test(args->filter, line, NULL); if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1; if ( !pass ) continue; } line = args->plugin.process(line); if ( line ) bcf_write1(args->out_fh, args->hdr_out, line); } destroy_data(args); bcf_sr_destroy(args->files); free(args); return 0; }
const char *panda_sam_version( void) { return hts_version(); }
static void init_data(args_t *args) { args->prev_rid = args->skip_rid = -1; args->hdr = args->files->readers[0].header; if ( !args->sample ) { if ( bcf_hdr_nsamples(args->hdr)>1 ) error("Missing the option -s, --sample\n"); args->sample = strdup(args->hdr->samples[0]); } if ( !bcf_hdr_nsamples(args->hdr) ) error("No samples in the VCF?\n"); // Set samples kstring_t str = {0,0,0}; if ( args->estimate_AF && strcmp("-",args->estimate_AF) ) { int i, n; char **smpls = hts_readlist(args->estimate_AF, 1, &n); // Make sure the query sample is included for (i=0; i<n; i++) if ( !strcmp(args->sample,smpls[i]) ) break; // Add the query sample if not present if ( i!=n ) kputs(args->sample, &str); for (i=0; i<n; i++) { if ( str.l ) kputc(',', &str); kputs(smpls[i], &str); free(smpls[i]); } free(smpls); } else if ( !args->estimate_AF ) kputs(args->sample, &str); if ( str.l ) { int ret = bcf_hdr_set_samples(args->hdr, str.s, 0); if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s); else if ( ret>0 ) error("The %d-th sample not found in the VCF\n", ret); } if ( args->af_tag ) if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) ) error("No such INFO tag in the VCF: %s\n", args->af_tag); args->nsmpl = bcf_hdr_nsamples(args->hdr); args->ismpl = bcf_hdr_id2int(args->hdr, BCF_DT_SAMPLE, args->sample); free(str.s); int i; for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.); // Init transition matrix and HMM double tprob[4]; MAT(tprob,2,STATE_HW,STATE_HW) = 1 - args->t2AZ; MAT(tprob,2,STATE_HW,STATE_AZ) = args->t2HW; MAT(tprob,2,STATE_AZ,STATE_HW) = args->t2AZ; MAT(tprob,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; if ( args->genmap_fname ) { args->hmm = hmm_init(2, tprob, 0); hmm_set_tprob_func(args->hmm, set_tprob_genmap, args); } else if ( args->rec_rate > 0 ) { args->hmm = hmm_init(2, tprob, 0); hmm_set_tprob_func(args->hmm, set_tprob_recrate, args); } else args->hmm = hmm_init(2, tprob, 10000); // print header printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version()); printf("# The command line was:\tbcftools %s", args->argv[0]); for (i=1; i<args->argc; i++) printf(" %s",args->argv[i]); printf("\n#\n"); printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n"); }
int main(int argc, char** argv){ if(strcmp(ZLIB_VERSION,zlib_version)){ fprintf(stderr,"\t-> Problem with difference in zlib version used for compiling and linking\n"); fprintf(stderr,"\t-> ZLIB_VERSION: %s zlibversion: %s \n",ZLIB_VERSION,zlib_version); //return 0; } fprintf(stderr,"\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); //no arguments supplied -> print info if(argc==1||(argc==2&&(strcasecmp(argv[1],"--version")==0||strcasecmp(argv[1],"--help")==0))){//if haven't been supplied with arguments, load default,print, and exit printProgInfo(stderr); return 0; } if(!strcasecmp("sites",argv[1])){ //from prep_sites.* used for indexing -sites files int main_sites(int argc,char **argv); main_sites(--argc,++argv); return 0; } //print time clock_t t=clock(); time_t t2=time(NULL); //intialize our signal handler for ctrl+c catchkill(); argStruct *args=NULL; if(argc==2){ fprintf(stderr,"\t-> Analysis helpbox/synopsis information:\n"); multiReader mr(argc,argv); args = mr.getargs(); init(args);//program dies here after printing info, if a match is found fprintf(stderr,"\nUnknown argument supplied: \'%s\'\n\n",argv[1]); printProgInfo(stderr); exit(0);//important otherwise the abc classes will try to clean up, which doesnt make sense in this context // return 0; } multiReader *mr= new multiReader(argc,argv); args = mr->getargs(); init(args); // fprintf(stderr,"adsfadsf\n");exit(0); parseArgStruct(args); //Below is main loop which will run until nomore data assert(args->hd); assert(args->revMap); int lastRefId=-1; while(SIG_COND) { funkyPars *tmp = mr->fetch(); //YES MISTER FETCH if(tmp==NULL) break; if(lastRefId==-1||lastRefId!=tmp->refId){ lastRefId=tmp->refId; void changeChr(int refId);//<-ugly is located in shared.cpp, to force a change of chr, when notusing bamfiles changeChr(lastRefId); } selector(tmp); } //now we have no more data lets wait and cleanup fprintf(stderr,"\t-> Done reading data waiting for calculations to finish\n"); destroy_shared(); //printout the filenames generated fprintf(stderr,"\t-> Output filenames:\n"); for(int i=0;i<(int)dumpedFiles.size();i++){ fprintf(stderr,"\t\t->\"%s\"\n",dumpedFiles[i]); fprintf(args->argumentFile,"\t\t->\"%s\"\n",dumpedFiles[i]); free(dumpedFiles[i]); } // fprintf(stderr,"\n"); fprintf(args->argumentFile,"\n"); // fprintf(stderr,"\t"); printTime(stderr); //print out nice messages fprintf(stderr,"\t-> Arguments and parameters for all analysis are located in .arg file\n"); fprintf(stderr, "\t[ALL done] cpu-time used = %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(stderr, "\t[ALL done] walltime used = %.2f sec\n", (float)(time(NULL) - t2)); fprintf(args->argumentFile, "\t[ALL done] cpu-time used = %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(args->argumentFile, "\t[ALL done] walltime used = %.2f sec\n", (float)(time(NULL) - t2)); //make better below extern int *bamSortedIds; delete [] bamSortedIds; delete mr; return 0; }
void version(const char **bcftools_version, const char **htslib_version) { *bcftools_version = BCFTOOLS_VERSION; *htslib_version = hts_version(); }
//last is from abc.h void print_bcf_header(htsFile *fp,bcf_hdr_t *hdr,argStruct *args,kstring_t &buf,const bam_hdr_t *bhdr){ assert(args); ksprintf(&buf, "##angsdVersion=%s+htslib-%s\n",angsd_version(),hts_version()); bcf_hdr_append(hdr, buf.s); buf.l = 0; ksprintf(&buf, "##angsdCommand="); for (int i=1; i<args->argc; i++) ksprintf(&buf, " %s", args->argv[i]); kputc('\n', &buf); bcf_hdr_append(hdr, buf.s); buf.l=0; if(args->ref){ buf.l = 0; ksprintf(&buf, "##reference=file://%s\n", args->ref); bcf_hdr_append(hdr,buf.s); } if (args->anc){ buf.l = 0; ksprintf(&buf, "##ancestral=file://%s\n", args->anc); bcf_hdr_append(hdr,buf.s); } // Translate BAM @SQ tags to BCF ##contig tags // todo: use/write new BAM header manipulation routines, fill also UR, M5 for (int i=0; i<bhdr->n_targets; i++){ buf.l = 0; ksprintf(&buf, "##contig=<ID=%s,length=%d>", bhdr->target_name[i], bhdr->target_len[i]); bcf_hdr_append(hdr, buf.s); } buf.l = 0; bcf_hdr_append(hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">"); bcf_hdr_append(hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">"); bcf_hdr_append(hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of raw reads supporting an indel\">"); bcf_hdr_append(hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of raw reads supporting an indel\">"); bcf_hdr_append(hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">"); bcf_hdr_append(hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">"); bcf_hdr_append(hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">"); bcf_hdr_append(hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">"); bcf_hdr_append(hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">"); bcf_hdr_append(hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">"); bcf_hdr_append(hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">"); bcf_hdr_append(hdr,"##INFO=<ID=AF,Number=A,Type=Float,Description=\"Minor Allele Frequency\">"); bcf_hdr_append(hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">"); bcf_hdr_append(hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">"); bcf_hdr_append(hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">"); bcf_hdr_append(hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">"); bcf_hdr_append(hdr,"##FORMAT=<ID=GL,Number=G,Type=Float,Description=\"scaled Genotype Likelihoods (loglikeratios to the most likely (in log10))\">"); //fprintf(stderr,"samples from sm:%d\n",args->sm->n); for(int i=0;i<args->sm->n;i++){ bcf_hdr_add_sample(hdr, args->sm->smpl[i]); } bcf_hdr_add_sample(hdr, NULL); // to update internal structures if ( bcf_hdr_write(fp, hdr)!=0 ) fprintf(stderr,"Failed to write bcf\n"); buf.l=0; }
static void init_data(args_t *args) { bcf_srs_t *files = bcf_sr_init(); if ( args->regions_list ) { if ( bcf_sr_set_regions(files, args->regions_list, args->regions_is_file)<0 ) error("Failed to read the regions: %s\n", args->regions_list); } if ( args->targets_list ) { if ( bcf_sr_set_targets(files, args->targets_list, args->targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", args->targets_list); } if ( !bcf_sr_add_reader(files, args->fname) ) error("Failed to open %s: %s\n", args->fname,bcf_sr_strerror(files->errnum)); bcf_hdr_t *hdr = files->readers[0].header; if ( !args->sample ) { if ( bcf_hdr_nsamples(hdr)>1 ) error("Missing the option -s, --sample\n"); args->sample = hdr->samples[0]; } else if ( bcf_hdr_id2int(hdr,BCF_DT_SAMPLE,args->sample)<0 ) error("No such sample: %s\n", args->sample); int ret = bcf_hdr_set_samples(hdr, args->sample, 0); if ( ret<0 ) error("Error setting the sample: %s\n", args->sample); if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,bcf_hdr_id2int(hdr,BCF_DT_ID,"BAF")) ) error("The tag FORMAT/BAF is not present in the VCF: %s\n", args->fname); int i; args->xvals = (double*) calloc(args->nbins,sizeof(double)); for (i=0; i<args->nbins; i++) args->xvals[i] = 1.0*i/(args->nbins-1); // collect BAF distributions for all chromosomes int idist = -1, nbaf = 0, nprocessed = 0, ntotal = 0, prev_chr = -1; float *baf = NULL; while ( bcf_sr_next_line(files) ) { ntotal++; bcf1_t *line = bcf_sr_get_line(files,0); if ( bcf_get_format_float(hdr,line,"BAF",&baf,&nbaf) != 1 ) continue; if ( bcf_float_is_missing(baf[0]) ) continue; nprocessed++; if ( prev_chr==-1 || prev_chr!=line->rid ) { // new chromosome idist = args->ndist++; args->dist = (dist_t*) realloc(args->dist, sizeof(dist_t)*args->ndist); memset(&args->dist[idist],0,sizeof(dist_t)); args->dist[idist].chr = strdup(bcf_seqname(hdr,line)); args->dist[idist].yvals = (double*) calloc(args->nbins,sizeof(double)); args->dist[idist].xvals = args->xvals; args->dist[idist].nvals = args->nbins; prev_chr = line->rid; } int bin = baf[0]*(args->nbins-1); args->dist[idist].yvals[bin]++; // the distribution } free(baf); bcf_sr_destroy(files); for (idist=0; idist<args->ndist; idist++) { #if 0 int j; for (j=0; j<args->nbins; j++) { double x = args->dist[idist].xvals[j]; args->dist[idist].yvals[j] = exp(-(x-0.5)*(x-0.5)/1e-3); } #endif init_dist(args, &args->dist[idist],args->verbose); } args->dat_fp = open_file(&args->dat_fname,"w","%s/dist.dat", args->output_dir); fprintf(args->dat_fp, "# This file was produced by: bcftools polysomy(%s+htslib-%s), the command line was:\n", bcftools_version(),hts_version()); fprintf(args->dat_fp, "# \t bcftools %s ", args->argv[0]); for (i=1; i<args->argc; i++) fprintf(args->dat_fp, " %s",args->argv[i]); fprintf(args->dat_fp,"\n#\n"); fprintf(args->dat_fp,"# DIST\t[2]Chrom\t[3]BAF\t[4]Normalized Count\n"); fprintf(args->dat_fp,"# FIT\t[2]Goodness of Fit\t[3]iFrom\t[4]iTo\t[5]The Fitted Function\n"); fprintf(args->dat_fp,"# CN\t[2]Chrom\t[3]Estimated Copy Number\t[4]Absolute fit deviation\n"); char *fname = NULL; FILE *fp = open_file(&fname,"w","%s/dist.py", args->output_dir); //-------- matplotlib script -------------- fprintf(fp, "#!/usr/bin/env python\n" "#\n" "import matplotlib as mpl\n" "mpl.use('Agg')\n" "import matplotlib.pyplot as plt\n" "import csv,sys,argparse\n" "from math import exp\n" "\n" "outdir = '%s'\n" "\n" "def read_dat(dat,fit,cn):\n" " csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)\n" " with open(outdir+'/dist.dat', 'rb') as f:\n" " reader = csv.reader(f, 'tab')\n" " for row in reader:\n" " if row[0][0]=='#': continue\n" " type = row[0]\n" " chr = row[1]\n" " if type=='DIST':\n" " if chr not in dat: dat[chr] = []\n" " dat[chr].append(row)\n" " elif type=='FIT':\n" " if chr not in fit: fit[chr] = []\n" " fit[chr].append(row)\n" " elif type=='CN':\n" " cn[chr] = row[2]\n" "\n" "def plot_dist(dat,fit,chr):\n" " fig, ax = plt.subplots(1, 1, figsize=(7,5))\n" " ax.plot([x[2] for x in dat[chr]],[x[3] for x in dat[chr]],'k-',label='Distribution')\n" " if chr in fit:\n" " for i in range(len(fit[chr])):\n" " pfit = fit[chr][i]\n" " exec('def xfit(x): return '+pfit[5])\n" " istart = int(pfit[3])\n" " iend = int(pfit[4])+1\n" " vals = dat[chr][istart:iend]\n" " args = {}\n" " if i==0: args = {'label':'Target to Fit'}\n" " ax.plot([x[2] for x in vals],[x[3] for x in vals],'r-',**args)\n" " if i==0: args = {'label':'Best Fit'}\n" " ax.plot([x[2] for x in vals],[xfit(float(x[2])) for x in vals],'g-',**args)\n" " ax.set_title('BAF distribution, chr'+chr)\n" " ax.set_xlabel('BAF')\n" " ax.set_ylabel('Frequency')\n" " ax.legend(loc='best',prop={'size':7},frameon=False)\n" " plt.savefig(outdir+'/dist.chr'+chr+'.png')\n" " plt.close()\n" "\n" "def plot_copy_number(cn):\n" " fig, ax = plt.subplots(1, 1, figsize=(7,5))\n" " xlabels = sorted(cn.keys())\n" " xvals = range(len(xlabels))\n" " yvals = [float(cn[x]) for x in xlabels]\n" " ax.plot(xvals,yvals,'o',color='red')\n" " for i in range(len(xvals)):\n" " if yvals[i]==-1: ax.annotate('?', xy=(xvals[i],0.5),va='center',ha='center',color='red',fontweight='bold')\n" " ax.tick_params(axis='both', which='major', labelsize=9)\n" " ax.set_xticks(xvals)\n" " ax.set_xticklabels(xlabels,rotation=45)\n" " ax.set_xlim(-1,len(xlabels))\n" " ax.set_ylim(0,5.0)\n" " ax.set_yticks([1.0,2.0,3.0,4.0])\n" " ax.set_xlabel('Chromosome')\n" " ax.set_ylabel('Copy Number')\n" " plt.savefig(outdir+'/copy-number.png')\n" " plt.close()\n" "\n" "class myParser(argparse.ArgumentParser):\n" " def error(self, message):\n" " self.print_help()\n" " sys.stderr.write('error: %%s\\n' %% message)\n" " sys.exit(2)\n" "\n" "def main():\n" " parser = myParser()\n" " parser.add_argument('-a', '--all', action='store_true', help='Create all plots')\n" " parser.add_argument('-c', '--copy-number', action='store_true', help='Create copy-number plot')\n" " parser.add_argument('-d', '--distrib', metavar='CHR', help='Plot BAF distribution of a single chromosome')\n" " args = parser.parse_args()\n" " dat = {}; fit = {}; cn = {}\n" " read_dat(dat,fit,cn)\n" " if args.distrib!=None:\n" " plot_dist(dat,fit,args.distrib)\n" " if args.all:\n" " for chr in dat: plot_dist(dat,fit,chr)\n" " plot_copy_number(cn)\n" " elif args.copy_number:\n" " plot_copy_number(cn)\n" " else:\n" " for chr in dat: plot_dist(dat,fit,chr)\n" "\n" "if __name__ == '__main__':\n" " main()\n", args->output_dir); //--------------------------------------- chmod(fname, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH|S_IXUSR|S_IXGRP|S_IXOTH); free(fname); fclose(fp); }
/* * Performs pileup * @param conf configuration for this pileup * @param n number of files specified in fn * @param fn filenames */ static int mpileup(mplp_conf_t *conf, int n, char **fn) { extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list); extern void bcf_call_del_rghash(void *rghash); mplp_aux_t **data; int i, tid, pos, *n_plp, tid0 = -1, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid = -1, max_depth, max_indel_depth; const bam_pileup1_t **plp; bam_mplp_t iter; bam_hdr_t *h = NULL; /* header of first file in input list */ char *ref; void *rghash = NULL; FILE *pileup_fp = NULL; bcf_callaux_t *bca = NULL; bcf_callret1_t *bcr = NULL; bcf_call_t bc; htsFile *bcf_fp = NULL; bcf_hdr_t *bcf_hdr = NULL; bam_sample_t *sm = NULL; kstring_t buf; mplp_pileup_t gplp; memset(&gplp, 0, sizeof(mplp_pileup_t)); memset(&buf, 0, sizeof(kstring_t)); memset(&bc, 0, sizeof(bcf_call_t)); data = calloc(n, sizeof(mplp_aux_t*)); plp = calloc(n, sizeof(bam_pileup1_t*)); n_plp = calloc(n, sizeof(int)); sm = bam_smpl_init(); if (n == 0) { fprintf(stderr,"[%s] no input file/data given\n", __func__); exit(1); } // read the header of each file in the list and initialize data for (i = 0; i < n; ++i) { bam_hdr_t *h_tmp; data[i] = calloc(1, sizeof(mplp_aux_t)); data[i]->fp = sam_open(fn[i], "rb"); if ( !data[i]->fp ) { fprintf(stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno)); exit(1); } hts_set_fai_filename(data[i]->fp, conf->fai_fname); data[i]->conf = conf; h_tmp = sam_hdr_read(data[i]->fp); if ( !h_tmp ) { fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]); exit(1); } data[i]->h = i? h : h_tmp; // for i==0, "h" has not been set yet bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text); // Collect read group IDs with PL (platform) listed in pl_list (note: fragile, strstr search) rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list); if (conf->reg) { hts_idx_t *idx = sam_index_load(data[i]->fp, fn[i]); if (idx == 0) { fprintf(stderr, "[%s] fail to load index for %s\n", __func__, fn[i]); exit(1); } if ( (data[i]->iter=sam_itr_querys(idx, data[i]->h, conf->reg)) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, conf->reg); exit(1); } if (i == 0) tid0 = data[i]->iter->tid, beg0 = data[i]->iter->beg, end0 = data[i]->iter->end; hts_idx_destroy(idx); } if (i == 0) h = h_tmp; /* save the header of first file in list */ else { // FIXME: to check consistency bam_hdr_destroy(h_tmp); } } // allocate data storage proportionate to number of samples being studied sm->n gplp.n = sm->n; gplp.n_plp = calloc(sm->n, sizeof(int)); gplp.m_plp = calloc(sm->n, sizeof(int)); gplp.plp = calloc(sm->n, sizeof(bam_pileup1_t*)); fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n); // write the VCF header if (conf->flag & MPLP_BCF) { const char *mode; if ( conf->flag & MPLP_VCF ) mode = (conf->flag&MPLP_NO_COMP)? "wu" : "wz"; // uncompressed VCF or compressed VCF else mode = (conf->flag&MPLP_NO_COMP)? "wub" : "wb"; // uncompressed BCF or compressed BCF bcf_fp = bcf_open(conf->output_fname? conf->output_fname : "-", mode); if (bcf_fp == NULL) { fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno)); exit(1); } bcf_hdr = bcf_hdr_init("w"); kstring_t str = {0,0,0}; ksprintf(&str, "##samtoolsVersion=%s+htslib-%s\n",samtools_version(),hts_version()); bcf_hdr_append(bcf_hdr, str.s); str.l = 0; ksprintf(&str, "##samtoolsCommand=samtools mpileup"); for (i=1; i<conf->argc; i++) ksprintf(&str, " %s", conf->argv[i]); kputc('\n', &str); bcf_hdr_append(bcf_hdr, str.s); if (conf->fai_fname) { str.l = 0; ksprintf(&str, "##reference=file://%s\n", conf->fai_fname); bcf_hdr_append(bcf_hdr, str.s); } // todo: use/write new BAM header manipulation routines, fill also UR, M5 for (i=0; i<h->n_targets; i++) { str.l = 0; ksprintf(&str, "##contig=<ID=%s,length=%d>", h->target_name[i], h->target_len[i]); bcf_hdr_append(bcf_hdr, str.s); } free(str.s); bcf_hdr_append(bcf_hdr,"##ALT=<ID=X,Description=\"Represents allele(s) other than observed.\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">"); #if CDF_MWU_TESTS bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">"); #endif bcf_hdr_append(bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">"); bcf_hdr_append(bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">"); bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">"); if ( conf->fmt_flag&B2B_FMT_DP ) bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">"); if ( conf->fmt_flag&B2B_FMT_DV ) bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">"); if ( conf->fmt_flag&B2B_FMT_DPR ) bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">"); if ( conf->fmt_flag&B2B_INFO_DPR ) bcf_hdr_append(bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">"); if ( conf->fmt_flag&B2B_FMT_DP4 ) bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">"); if ( conf->fmt_flag&B2B_FMT_SP ) bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">"); for (i=0; i<sm->n; i++) bcf_hdr_add_sample(bcf_hdr, sm->smpl[i]); bcf_hdr_add_sample(bcf_hdr, NULL); bcf_hdr_write(bcf_fp, bcf_hdr); bca = bcf_call_init(-1., conf->min_baseQ); bcr = calloc(sm->n, sizeof(bcf_callret1_t)); bca->rghash = rghash; bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ; bca->min_frac = conf->min_frac; bca->min_support = conf->min_support; bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE; bc.bcf_hdr = bcf_hdr; bc.n = sm->n; bc.PL = malloc(15 * sm->n * sizeof(*bc.PL)); if (conf->fmt_flag) { assert( sizeof(float)==sizeof(int32_t) ); bc.DP4 = malloc(sm->n * sizeof(int32_t) * 4); bc.fmt_arr = malloc(sm->n * sizeof(float)); // all fmt_flag fields if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR) ) { // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample bc.DPR = malloc((sm->n+1)*B2B_MAX_ALLELES*sizeof(int32_t)); for (i=0; i<sm->n; i++) bcr[i].DPR = bc.DPR + (i+1)*B2B_MAX_ALLELES; } } } else { pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : stdout; if (pileup_fp == NULL) { fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno)); exit(1); } } if (tid0 >= 0 && conf->fai) { // region is set ref = faidx_fetch_seq(conf->fai, h->target_name[tid0], 0, 0x7fffffff, &ref_len); ref_tid = tid0; for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid0; } else ref_tid = -1, ref = 0; // begin pileup iter = bam_mplp_init(n, mplp_func, (void**)data); if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(iter); max_depth = conf->max_depth; if (max_depth * sm->n > 1<<20) fprintf(stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__); if (max_depth * sm->n < 8000) { max_depth = 8000 / sm->n; fprintf(stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth); } max_indel_depth = conf->max_indel_depth * sm->n; bam_mplp_set_maxcnt(iter, max_depth); bcf1_t *bcf_rec = bcf_init1(); int ret; while ( (ret=bam_mplp_auto(iter, &tid, &pos, n_plp, plp)) > 0) { if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue; if (tid != ref_tid) { free(ref); ref = 0; if (conf->fai) ref = faidx_fetch_seq(conf->fai, h->target_name[tid], 0, 0x7fffffff, &ref_len); for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid; ref_tid = tid; } if (conf->flag & MPLP_BCF) { int total_depth, _ref0, ref16; for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i]; group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG); _ref0 = (ref && pos < ref_len)? ref[pos] : 'N'; ref16 = seq_nt16_table[_ref0]; bcf_callaux_clean(bca, &bc); for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i); bc.tid = tid; bc.pos = pos; bcf_call_combine(gplp.n, bcr, bca, ref16, &bc); bcf_clear1(bcf_rec); bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, 0, 0); bcf_write1(bcf_fp, bcf_hdr, bcf_rec); // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring? if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) { bcf_callaux_clean(bca, &bc); for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i); if (bcf_call_combine(gplp.n, bcr, bca, -1, &bc) >= 0) { bcf_clear1(bcf_rec); bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, bca, ref); bcf_write1(bcf_fp, bcf_hdr, bcf_rec); } } } else { fprintf(pileup_fp, "%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); for (i = 0; i < n; ++i) { int j, cnt; for (j = cnt = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; if (bam_get_qual(p->b)[p->qpos] >= conf->min_baseQ) ++cnt; } fprintf(pileup_fp, "\t%d\t", cnt); if (n_plp[i] == 0) { fputs("*\t*", pileup_fp); if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", pileup_fp); if (conf->flag & MPLP_PRINT_POS) fputs("\t*", pileup_fp); } else { for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; if (bam_get_qual(p->b)[p->qpos] >= conf->min_baseQ) pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref); } putc('\t', pileup_fp); for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; int c = bam_get_qual(p->b)[p->qpos]; if (c >= conf->min_baseQ) { c = c + 33 < 126? c + 33 : 126; putc(c, pileup_fp); } } if (conf->flag & MPLP_PRINT_MAPQ) { putc('\t', pileup_fp); for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; int c = bam_get_qual(p->b)[p->qpos]; if ( c < conf->min_baseQ ) continue; c = plp[i][j].b->core.qual + 33; if (c > 126) c = 126; putc(c, pileup_fp); } } if (conf->flag & MPLP_PRINT_POS) { putc('\t', pileup_fp); for (j = 0; j < n_plp[i]; ++j) { if (j > 0) putc(',', pileup_fp); fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow... } } } } putc('\n', pileup_fp); } } // clean up free(bc.tmp.s); bcf_destroy1(bcf_rec); if (bcf_fp) { hts_close(bcf_fp); bcf_hdr_destroy(bcf_hdr); bcf_call_destroy(bca); free(bc.PL); free(bc.DP4); free(bc.DPR); free(bc.fmt_arr); free(bcr); } if (pileup_fp && conf->output_fname) fclose(pileup_fp); bam_smpl_destroy(sm); free(buf.s); for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]); free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp); bcf_call_del_rghash(rghash); bam_mplp_destroy(iter); bam_hdr_destroy(h); for (i = 0; i < n; ++i) { sam_close(data[i]->fp); if (data[i]->iter) hts_itr_destroy(data[i]->iter); free(data[i]); } free(data); free(plp); free(ref); free(n_plp); return ret; }