Exemplo n.º 1
0
char *bcftools_version(void)
{
    if ( !bcftools_version_string )
    {
        int len = strlen(hts_version()) + strlen(BCFTOOLS_VERSION) + 9;
        bcftools_version_string = (char*) malloc(len);
        snprintf(bcftools_version_string,len,"%s+htslib-%s", BCFTOOLS_VERSION,hts_version());
    }
    return bcftools_version_string;
}
Exemplo n.º 2
0
//print nice info
void printProgInfo(FILE *fp){
  fprintf(fp,"\n\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); 
  fprintf(fp,"\t-> Please use the website \"http://www.popgen.dk/angsd\" as reference\n");
  fprintf(fp,"\t-> Use -nThreads or -P for number of threads allocated to the program\n"); 
  fprintf(fp,"Overview of methods:\n");
  fprintf(fp,"\t-GL\t\tEstimate genotype likelihoods\n");
  fprintf(fp,"\t-doCounts\tCalculate various counts statistics\n");
  fprintf(fp,"\t-doAsso\t\tPerform association study\n");
  fprintf(fp,"\t-doMaf\t\tEstimate allele frequencies\n");
  fprintf(fp,"\t-doError\tEstimate the type specific error rates\n");
  fprintf(fp,"\t-doAncError\tEstimate the errorrate based on perfect fastas\n");
  fprintf(fp,"\t-HWE_pval\t\tEst inbreedning per site or use as filter\n");
  fprintf(fp,"\t-doGeno\t\tCall genotypes\n");
  fprintf(fp,"\t-doFasta\tGenerate a fasta for a BAM file\n");
  fprintf(fp,"\t-doAbbababa\tPerform an ABBA-BABA test\n");
  fprintf(fp,"\t-sites\t\tAnalyse specific sites (can force major/minor)\n");
  fprintf(fp,"\t-doSaf\t\tEstimate the SFS and/or neutrality tests genotype calling\n");
  fprintf(fp,"\t-doHetPlas\tEstimate hetplasmy by calculating a pooled haploid frequency\n");
  fprintf(fp,"\n\tBelow are options that can be usefull\n");
  fprintf(fp,"\t-bam\t\tOptions relating to bam reading\n\t-doMajorMinor\tInfer the major/minor using different approaches\n");  
  fprintf(fp,"\t-ref/-anc\tRead reference or ancestral genome\n");
  fprintf(fp,"\t-doSNPstat\tCalculate various SNPstat\n");
  fprintf(fp,"\tmany others\n\n");
 
  fprintf(fp,"For information of specific options type: \n\t./angsd METHODNAME eg \n\t\t./angsd -GL\n\t\t./angsd -doMaf\n\t\t./angsd -doAsso etc\n");
  fprintf(fp,"\t\t./angsd sites for information about indexing -sites files\n");
  fprintf(fp,"Examples:\n\tEstimate MAF for bam files in 'list'\n\t\t\'./angsd -bam list -GL 2 -doMaf 2 -out RES -doMajorMinor 1\'\n");
  
}
Exemplo n.º 3
0
Arquivo: tabix.c Projeto: Illumina/akt
static int usage(void)
{
    fprintf(stderr, "\n");
    fprintf(stderr, "Version: %s\n", hts_version());
    fprintf(stderr, "Usage:   tabix [OPTIONS] [FILE] [REGION [...]]\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "Indexing Options:\n");
    fprintf(stderr, "   -0, --zero-based           coordinates are zero-based\n");
    fprintf(stderr, "   -b, --begin INT            column number for region start [4]\n");
    fprintf(stderr, "   -c, --comment CHAR         skip comment lines starting with CHAR [null]\n");
    fprintf(stderr, "   -C, --csi                  generate CSI index for VCF (default is TBI)\n");
    fprintf(stderr, "   -e, --end INT              column number for region end (if no end, set INT to -b) [5]\n");
    fprintf(stderr, "   -f, --force                overwrite existing index without asking\n");
    fprintf(stderr, "   -m, --min-shift INT        set minimal interval size for CSI indices to 2^INT [14]\n");
    fprintf(stderr, "   -p, --preset STR           gff, bed, sam, vcf\n");
    fprintf(stderr, "   -s, --sequence INT         column number for sequence names (suppressed by -p) [1]\n");
    fprintf(stderr, "   -S, --skip-lines INT       skip first INT lines [0]\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "Querying and other options:\n");
    fprintf(stderr, "   -h, --print-header         print also the header lines\n");
    fprintf(stderr, "   -H, --only-header          print only the header lines\n");
    fprintf(stderr, "   -l, --list-chroms          list chromosome names\n");
    fprintf(stderr, "   -r, --reheader FILE        replace the header with the content of FILE\n");
    fprintf(stderr, "   -R, --regions FILE         restrict to regions listed in the file\n");
    fprintf(stderr, "   -T, --targets FILE         similar to -R but streams rather than index-jumps\n");
    fprintf(stderr, "\n");
    return 1;
}
Exemplo n.º 4
0
int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
{
    static const struct hFILE_scheme_handler handler =
        { hopen_libcurl, hfile_always_remote, "libcurl", 50 };

    const curl_version_info_data *info;
    const char * const *protocol;
    CURLcode err;

    err = curl_global_init(CURL_GLOBAL_ALL);
    if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }

    curl.multi = curl_multi_init();
    if (curl.multi == NULL) { curl_global_cleanup(); errno = EIO; return -1; }

    info = curl_version_info(CURLVERSION_NOW);
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s",
             hts_version(), info->version);

    curl.nrunning = 0;
    curl.perform_again = 0;
    self->name = "libcurl";
    self->destroy = libcurl_exit;

    for (protocol = info->protocols; *protocol; protocol++)
        hfile_add_scheme_handler(*protocol, &handler);

    hfile_add_scheme_handler("s3", &handler);
    hfile_add_scheme_handler("s3+http", &handler);
    if (info->features & CURL_VERSION_SSL)
        hfile_add_scheme_handler("s3+https", &handler);

    return 0;
}
Exemplo n.º 5
0
static void usage(FILE *fp)
{
    fprintf(fp, "\n");
    fprintf(fp, "Program: bcftools (Tools for variant calling and manipulating VCFs and BCFs)\n");
#if USE_GPL
    fprintf(fp, "License: GNU GPLv3+, due to use of the GNU Scientific Library\n");
#endif
    fprintf(fp, "Version: %s (using htslib %s)\n", bcftools_version(), hts_version());
    fprintf(fp, "\n");
    fprintf(fp, "Usage:   bcftools [--version|--version-only] [--help] <command> <argument>\n");
    fprintf(fp, "\n");
    fprintf(fp, "Commands:\n");

    int i = 0;
    const char *sep = NULL;
    while (cmds[i].alias)
    {
        if ( !cmds[i].func ) sep = cmds[i].alias;
        if ( sep )
        {
            fprintf(fp, "\n -- %s\n", sep);
            sep = NULL;
        }
        if ( cmds[i].func && cmds[i].help[0]!='-' ) fprintf(fp, "    %-12s %s\n", cmds[i].alias, cmds[i].help);
        i++;
    }
    fprintf(fp,"\n");
    fprintf(fp,
            " Most commands accept VCF, bgzipped VCF, and BCF with the file type detected\n"
            " automatically even when streaming from a pipe. Indexed VCF and BCF will work\n"
            " in all situations. Un-indexed VCF and BCF and streams will work in most but\n"
            " not all situations.\n");
    fprintf(fp,"\n");
}
Exemplo n.º 6
0
static void usage(FILE *fp)
{
    /* Please improve the grouping */

    fprintf(fp,
"\n"
"Program: samtools (Tools for alignments in the SAM format)\n"
"Version: %s (using htslib %s)\n\n", samtools_version(), hts_version());
    fprintf(fp,
"Usage:   samtools <command> [options]\n"
"\n"
"Commands:\n"
"  -- Indexing\n"
"     dict           create a sequence dictionary file\n"
"     faidx          index/extract FASTA\n"
"     index          index alignment\n"
"\n"
"  -- Editing\n"
"     calmd          recalculate MD/NM tags and '=' bases\n"
"     fixmate        fix mate information\n"
"     reheader       replace BAM header\n"
"     rmdup          remove PCR duplicates\n"
"     targetcut      cut fosmid regions (for fosmid pool only)\n"
"     addreplacerg   adds or replaces RG tags\n"
"\n"
"  -- File operations\n"
"     collate        shuffle and group alignments by name\n"
"     cat            concatenate BAMs\n"
"     merge          merge sorted alignments\n"
"     mpileup        multi-way pileup\n"
"     sort           sort alignment file\n"
"     split          splits a file by read group\n"
"     quickcheck     quickly check if SAM/BAM/CRAM file appears intact\n"
"     fastq          converts a BAM to a FASTQ\n"
"     fasta          converts a BAM to a FASTA\n"
"\n"
"  -- Statistics\n"
"     bedcov         read depth per BED region\n"
"     depth          compute the depth\n"
"     flagstat       simple stats\n"
"     idxstats       BAM index stats\n"
"     phase          phase heterozygotes\n"
"     stats          generate stats (former bamcheck)\n"
"\n"
"  -- Viewing\n"
"     flags          explain BAM flags\n"
"     tview          text alignment viewer\n"
"     view           SAM<->BAM<->CRAM conversion\n"
"     depad          convert padded BAM to unpadded BAM\n"
"\n");
#ifdef _WIN32
    fprintf(fp,
"Note: The Windows version of SAMtools is mainly designed for read-only\n"
"      operations, such as viewing the alignments and generating the pileup.\n"
"      Binary files generated by the Windows version may be buggy.\n\n");
#endif
}
Exemplo n.º 7
0
void usage_main() {
    fprintf(stderr, "PileOMeth: A tool for processing bisulfite sequencing alignments.\n"
"Version: %s (using HTSlib version %s)\n", VERSION, hts_version());
    fprintf(stderr,
"Usage: PileOMeth <command> [options]\n\n"
"Commands:\n"
"    mbias    Determine the position-dependent methylation bias in a dataset,\n"
"             producing diagnostic SVG images.\n"
"    extract  Extract methylation metrics from an alignment file in BAM/CRAM\n"
"             format.\n"
"    mergeContext   Combine single Cytosine metrics from 'PileOMeth extract' into\n"
"             per-CpG/CHG metrics.\n"
);
}
Exemplo n.º 8
0
static int irods_init()
{
    kstring_t useragent = { 0, 0, NULL };
    struct sigaction pipehandler;
    rErrMsg_t err;
    int ret, pipehandler_ret;

    if (hts_verbose >= 5) rodsLogLevel(hts_verbose);

    ret = getRodsEnv(&irods.env);
    if (ret < 0) goto error;

    // Set iRODS User-Agent, if our caller hasn't already done so.
    kputs("htslib/", &useragent);
    kputs(hts_version(), &useragent);
    (void) setenv(SP_OPTION, useragent.s, 0);
    free(useragent.s);

    // Prior to iRODS 4.1, rcConnect() (even if it fails) installs its own
    // SIGPIPE handler, which just prints a message and otherwise ignores the
    // signal.  Most actual SIGPIPEs encountered will pertain to e.g. stdout
    // rather than iRODS's connection, so we save and restore the existing
    // state (by default, termination; or as already set by our caller).
    pipehandler_ret = sigaction(SIGPIPE, NULL, &pipehandler);

    irods.conn = rcConnect(irods.env.rodsHost, irods.env.rodsPort,
                           irods.env.rodsUserName, irods.env.rodsZone,
                           NO_RECONN, &err);
    if (pipehandler_ret == 0) sigaction(SIGPIPE, &pipehandler, NULL);
    if (irods.conn == NULL) { ret = err.status; goto error; }

    if (strcmp(irods.env.rodsUserName, PUBLIC_USER_NAME) != 0) {
#if defined IRODS_VERSION_INTEGER && IRODS_VERSION_INTEGER >= 4000000
        ret = clientLogin(irods.conn, NULL, NULL);
#else
        ret = clientLogin(irods.conn);
#endif
        if (ret != 0) goto error;
    }

    return 0;

error:
    if (irods.conn) { (void) rcDisconnect(irods.conn); }
    irods.conn = NULL;
    set_errno(ret);
    return -1;
}
Exemplo n.º 9
0
int bcftools_main(int argc, char *argv[])
{
    if (argc < 2) { usage(bcftools_stderr); return 1; }

    if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) {
        fprintf(bcftools_stdout, "bcftools %s\nUsing htslib %s\nCopyright (C) 2016 Genome Research Ltd.\n", bcftools_version(), hts_version());
#if USE_GPL
        fprintf(bcftools_stdout, "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
#else
        fprintf(bcftools_stdout, "License Expat: The MIT/Expat license\n");
#endif
        fprintf(bcftools_stdout, "This is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n");
        return 0;
    }
    else if (strcmp(argv[1], "--version-only") == 0) {
        fprintf(bcftools_stdout, "%s+htslib-%s\n", bcftools_version(), hts_version());
        return 0;
    }
    else if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) {
        if (argc == 2) { usage(bcftools_stdout); return 0; }
        // Otherwise change "bcftools help COMMAND [...]" to "bcftools COMMAND";
        // main_xyz() functions by convention display the subcommand's usage
        // when invoked without any arguments.
        argv++;
        argc = 2;
    }
    else if ( argv[1][0]=='+' )
    {
        // "bcftools plugin name" can be run as "bcftools +name"
        argv[1]++;
        argv[0] = "plugin";
        argv--;
        argc++;
    }

    int i = 0;
    while (cmds[i].alias)
    {
        if (cmds[i].func && strcmp(argv[1],cmds[i].alias)==0)
        {
            return cmds[i].func(argc-1,argv+1);
        }
        i++;
    }
    fprintf(bcftools_stderr, "[E::%s] unrecognized command '%s'\n", __func__, argv[1]);
    return 1;
}
Exemplo n.º 10
0
Arquivo: bgzip.c Projeto: Abdul59/STAR
static int bgzip_main_usage(void)
{
	fprintf(stderr, "\n");
	fprintf(stderr, "Version: %s\n", hts_version());
	fprintf(stderr, "Usage:   bgzip [OPTIONS] [FILE] ...\n");
	fprintf(stderr, "Options:\n");
	fprintf(stderr, "   -b, --offset INT        decompress at virtual file pointer (0-based uncompressed offset)\n");
    fprintf(stderr, "   -c, --stdout            write on standard output, keep original files unchanged\n");
	fprintf(stderr, "   -d, --decompress        decompress\n");
	fprintf(stderr, "   -f, --force             overwrite files without asking\n");
	fprintf(stderr, "   -h, --help              give this help\n");
	fprintf(stderr, "   -i, --index             compress and create BGZF index\n");
	fprintf(stderr, "   -I, --index-name FILE   name of BGZF index file [file.gz.gzi]\n");
	fprintf(stderr, "   -r, --reindex           (re)index compressed file\n");
	fprintf(stderr, "   -s, --size INT          decompress INT bytes (uncompressed size)\n");
	fprintf(stderr, "\n");
	return 1;
}
Exemplo n.º 11
0
static void init_plugin(args_t *args)
{
    static int warned_bcftools = 0, warned_htslib = 0;

    int ret = args->plugin.init(args->plugin.argc,args->plugin.argv,args->hdr,args->hdr_out);
    if ( ret<0 ) error("The plugin exited with an error: %s\n", args->plugin.name);
    const char *bver, *hver;
    args->plugin.version(&bver, &hver);
    if ( strcmp(bver,bcftools_version()) && !warned_bcftools )
    {
        fprintf(stderr,"WARNING: bcftools version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", bcftools_version(),args->plugin.name,bver);
        warned_bcftools = 1;
    }
    if ( strcmp(hver,hts_version()) && !warned_htslib )
    {
        fprintf(stderr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver);
        warned_htslib = 1;
    }
    args->drop_header += ret;
}
Exemplo n.º 12
0
int main(int argc, char *argv[]) {
  int i;
  int ret = 0;
  int defaultPort = 0;
  int parentPid = 0;

  printf("Initialzing the server..\n");

#ifdef _WIN32
  {
    WORD wVersionRequested;     // requested version WinSock API
    WSADATA wsadata;            // Windows Sockets API data
    int stat;

    wVersionRequested = 0x0101;
    stat = WSAStartup(wVersionRequested, &wsadata);
    if (stat != 0) {
      fprintf(stderr, "Winsock not found!\n");
      return -1;
    } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
      fprintf(stderr, "WINSOCK.DLL does not support version 1.1\n");
      WSACleanup();
      return -1;
    }
  }
#endif

  if (argc < 2 || (argc % 2) != 0) {
    fprintf(stderr, "** Warning: use the webhttrack frontend if available\n");
    fprintf(stderr,
            "usage: %s [--port <port>] [--ppid parent-pid] <path-to-html-root-dir> [key value [key value]..]\n",
            argv[0]);
    fprintf(stderr, "example: %s /usr/share/httrack/\n", argv[0]);
    return 1;
  }

  /* init and launch */
  hts_init();
  htslang_init();

  /* set general keys */
#ifdef HTS_ETCPATH
  smallserver_setkey("ETCPATH", HTS_ETCPATH);
#endif
#ifdef HTS_BINPATH
  smallserver_setkey("BINPATH", HTS_BINPATH);
#endif
#ifdef HTS_LIBPATH
  smallserver_setkey("LIBPATH", HTS_LIBPATH);
#endif
#ifdef HTS_PREFIX
  smallserver_setkey("PREFIX", HTS_PREFIX);
#endif
#ifdef HTS_HTTRACKCNF
  smallserver_setkey("HTTRACKCNF", HTS_HTTRACKCNF);
#endif
#ifdef HTS_HTTRACKDIR
  smallserver_setkey("HTTRACKDIR", HTS_HTTRACKDIR);
#endif
#ifdef HTS_INET6
  smallserver_setkey("INET6", "1");
#endif
#ifdef HTS_USEOPENSSL
  smallserver_setkey("USEOPENSSL", "1");
#endif
#ifdef HTS_DLOPEN
  smallserver_setkey("DLOPEN", "1");
#endif
#ifdef HTS_USESWF
  smallserver_setkey("USESWF", "1");
#endif
#ifdef HTS_USEZLIB
  smallserver_setkey("USEZLIB", "1");
#endif
#ifdef _WIN32
  smallserver_setkey("WIN32", "1");
#endif
  smallserver_setkey("HTTRACK_VERSION", HTTRACK_VERSION);
  smallserver_setkey("HTTRACK_VERSIONID", HTTRACK_VERSIONID);
  smallserver_setkey("HTTRACK_AFF_VERSION", HTTRACK_AFF_VERSION);
  {
    char tmp[32];

    sprintf(tmp, "%d", -1);
    smallserver_setkey("HTS_PLATFORM", tmp);
  }
  smallserver_setkey("HTTRACK_WEB", HTTRACK_WEB);

  /* Check version compatibility */
  if (hts_sizeof_opt() != sizeof(httrackp)) {
    fprintf(stderr,
      "** CRITICAL: incompatible current httrack library version %s, expected version %s",
      hts_version(), HTTRACK_VERSIONID);
    smallserver_setkey("HTTRACK_INCOMPATIBLE_VERSIONID", hts_version());
  }

  /* protected session-id */
  {
    char buff[1024];
    char digest[32 + 2];

    srand((unsigned int) time(NULL));
    sprintf(buff, "%d-%d", (int) time(NULL), (int) rand());
    domd5mem(buff, strlen(buff), digest, 1);
    smallserver_setkey("sid", digest);
    smallserver_setkey("_sid", digest);
  }

  /* set commandline keys */
  for(i = 2; i < argc; i += 2) {
    if (strcmp(argv[i], "--port") == 0 && i + 1 < argc) {
      if (sscanf(argv[i + 1], "%d", &defaultPort) != 1 || defaultPort < 0
          || defaultPort >= 65535) {
        fprintf(stderr, "couldn't set the port number to %s\n", argv[i + 1]);
        return -1;
      }
    } else if (strcmp(argv[i], "--ppid") == 0 && i + 1 < argc) {
      if (sscanf(argv[i + 1], "%u", &parentPid) != 1) {
        fprintf(stderr, "couldn't set the parent PID to %s\n", argv[i + 1]);
        return -1;
      }
    } else if (i + 1 < argc) {
      smallserver_setkey(argv[i], argv[i + 1]);
    } else {
      fprintf(stderr, "Error in commandline!\n");
      return -1;
    }
  }

  /* sigpipe */
#ifndef _WIN32
  signal(SIGPIPE, htsweb_sig_brpipe);   // broken pipe (write into non-opened socket)
#endif

  /* pinger */
  if (parentPid > 0) {
    hts_newthread(client_ping, (void *) (uintptr_t) parentPid);
    background_threads++; /* Do not wait for this thread! */
    smallserver_setpinghandler(pingHandler, NULL);
  }

  /* launch */
  ret = help_server(argv[1], defaultPort);

  htsthread_wait_n(background_threads - 1);
  hts_uninit();

#ifdef _WIN32
  WSACleanup();
#endif

  return ret;
}
Exemplo n.º 13
0
static void print_header(args_t *args, FILE *fp)
{
    fprintf(fp, "# This file was produced by bcftools (%s+htslib-%s), the command line was:\n", bcftools_version(), hts_version());
    fprintf(fp, "# \t bcftools %s ", args->argv[0]);
    int i;
    for (i=1; i<args->argc; i++)
        fprintf(fp, " %s",args->argv[i]);
    fprintf(fp, "\n# and the working directory was:\n");
    fprintf(fp, "# \t %s\n#\n", args->cwd);
}
Exemplo n.º 14
0
int main(int argc, char **argv) {
  int ret = 0;
  httrackp *opt;

#ifdef _WIN32
  {
    WORD wVersionRequested;     // requested version WinSock API
    WSADATA wsadata;            // Windows Sockets API data
    int stat;

    wVersionRequested = 0x0101;
    stat = WSAStartup(wVersionRequested, &wsadata);
    if (stat != 0) {
      printf("Winsock not found!\n");
      return;
    } else if (LOBYTE(wsadata.wVersion) != 1 && HIBYTE(wsadata.wVersion) != 1) {
      printf("WINSOCK.DLL does not support version 1.1\n");
      WSACleanup();
      return;
    }
  }
#endif

  signal_handlers();
  hts_init();

  // Check version compatibility
  if (hts_sizeof_opt() != sizeof(httrackp)) {
    fprintf(stderr,
      "incompatible current httrack library version %s, expected version %s",
      hts_version(), HTTRACK_VERSIONID);
    abortLog("incompatible httrack library version, please update both httrack and its library");
  }

  opt = global_opt = hts_create_opt();
  assert(opt->size_httrackp == sizeof(httrackp));

  CHAIN_FUNCTION(opt, init, htsshow_init, NULL);
  CHAIN_FUNCTION(opt, uninit, htsshow_uninit, NULL);
  CHAIN_FUNCTION(opt, start, htsshow_start, NULL);
  CHAIN_FUNCTION(opt, end, htsshow_end, NULL);
  CHAIN_FUNCTION(opt, chopt, htsshow_chopt, NULL);
  CHAIN_FUNCTION(opt, preprocess, htsshow_preprocesshtml, NULL);
  CHAIN_FUNCTION(opt, postprocess, htsshow_postprocesshtml, NULL);
  CHAIN_FUNCTION(opt, check_html, htsshow_checkhtml, NULL);
  CHAIN_FUNCTION(opt, query, htsshow_query, NULL);
  CHAIN_FUNCTION(opt, query2, htsshow_query2, NULL);
  CHAIN_FUNCTION(opt, query3, htsshow_query3, NULL);
  CHAIN_FUNCTION(opt, loop, htsshow_loop, NULL);
  CHAIN_FUNCTION(opt, check_link, htsshow_check, NULL);
  CHAIN_FUNCTION(opt, check_mime, htsshow_check_mime, NULL);
  CHAIN_FUNCTION(opt, pause, htsshow_pause, NULL);
  CHAIN_FUNCTION(opt, filesave, htsshow_filesave, NULL);
  CHAIN_FUNCTION(opt, filesave2, htsshow_filesave2, NULL);
  CHAIN_FUNCTION(opt, linkdetected, htsshow_linkdetected, NULL);
  CHAIN_FUNCTION(opt, linkdetected2, htsshow_linkdetected2, NULL);
  CHAIN_FUNCTION(opt, xfrstatus, htsshow_xfrstatus, NULL);
  CHAIN_FUNCTION(opt, savename, htsshow_savename, NULL);
  CHAIN_FUNCTION(opt, sendhead, htsshow_sendheader, NULL);
  CHAIN_FUNCTION(opt, receivehead, htsshow_receiveheader, NULL);

  ret = hts_main2(argc, argv, opt);
  if (ret) {
    fprintf(stderr, "* %s\n", hts_errmsg(opt));
  }
  global_opt = NULL;
  hts_free_opt(opt);
  htsthread_wait();             /* wait for pending threads */
  hts_uninit();

#ifdef _WIN32
  WSACleanup();
#endif

  return ret;
}
Exemplo n.º 15
0
int samtools_main(int argc, char *argv[])
{
#ifdef _WIN32
    setmode(fileno(stdout), O_BINARY);
    setmode(fileno(stdin),  O_BINARY);
#endif

    if (argc < 2) { usage(pysamerr); return 1; }
    
    if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) {
        if (argc == 2) { usage(stdout); return 0; }

        // Otherwise change "samtools help COMMAND [...]" to "samtools COMMAND";
        // main_xyz() functions by convention display the subcommand's usage
        // when invoked without any arguments.
        argv++;
        argc = 2;
    }
    int ret = 0;
    if (strcmp(argv[1], "view") == 0)           ret = main_samview(argc-1, argv+1);
    else if (strcmp(argv[1], "import") == 0)    ret = main_import(argc-1, argv+1);
    else if (strcmp(argv[1], "mpileup") == 0)   ret = bam_mpileup(argc-1, argv+1);
    else if (strcmp(argv[1], "merge") == 0)     ret = bam_merge(argc-1, argv+1);
    else if (strcmp(argv[1], "sort") == 0)      ret = bam_sort(argc-1, argv+1);
    else if (strcmp(argv[1], "index") == 0)     ret = bam_index(argc-1, argv+1);
    else if (strcmp(argv[1], "idxstats") == 0)  ret = bam_idxstats(argc-1, argv+1);
    else if (strcmp(argv[1], "faidx") == 0)     ret = faidx_main(argc-1, argv+1);
    else if (strcmp(argv[1], "dict") == 0)      ret = dict_main(argc-1, argv+1);
    else if (strcmp(argv[1], "fixmate") == 0)   ret = bam_mating(argc-1, argv+1);
    else if (strcmp(argv[1], "rmdup") == 0)     ret = bam_rmdup(argc-1, argv+1);
    else if (strcmp(argv[1], "flagstat") == 0)  ret = bam_flagstat(argc-1, argv+1);
    else if (strcmp(argv[1], "calmd") == 0)     ret = bam_fillmd(argc-1, argv+1);
    else if (strcmp(argv[1], "fillmd") == 0)    ret = bam_fillmd(argc-1, argv+1);
    else if (strcmp(argv[1], "reheader") == 0)  ret = main_reheader(argc-1, argv+1);
    else if (strcmp(argv[1], "cat") == 0)       ret = main_cat(argc-1, argv+1);
    else if (strcmp(argv[1], "targetcut") == 0) ret = main_cut_target(argc-1, argv+1);
    else if (strcmp(argv[1], "phase") == 0)     ret = main_phase(argc-1, argv+1);
    else if (strcmp(argv[1], "depth") == 0)     ret = main_depth(argc-1, argv+1);
    else if (strcmp(argv[1], "bam2fq") == 0 ||
             strcmp(argv[1], "fastq") == 0 ||
             strcmp(argv[1], "fasta") == 0)     ret = main_bam2fq(argc-1, argv+1);
    else if (strcmp(argv[1], "pad2unpad") == 0) ret = main_pad2unpad(argc-1, argv+1);
    else if (strcmp(argv[1], "depad") == 0)     ret = main_pad2unpad(argc-1, argv+1);
    else if (strcmp(argv[1], "bedcov") == 0)    ret = main_bedcov(argc-1, argv+1);
    else if (strcmp(argv[1], "bamshuf") == 0)   ret = main_bamshuf(argc-1, argv+1);
    else if (strcmp(argv[1], "collate") == 0)   ret = main_bamshuf(argc-1, argv+1);
    else if (strcmp(argv[1], "stats") == 0)     ret = main_stats(argc-1, argv+1);
    else if (strcmp(argv[1], "flags") == 0)     ret = main_flags(argc-1, argv+1);
    else if (strcmp(argv[1], "split") == 0)     ret = main_split(argc-1, argv+1);
    else if (strcmp(argv[1], "quickcheck") == 0)  ret = main_quickcheck(argc-1, argv+1);
    else if (strcmp(argv[1], "addreplacerg") == 0) ret = main_addreplacerg(argc-1, argv+1);
    else if (strcmp(argv[1], "pileup") == 0) {
        fprintf(pysamerr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
        return 1;
    }
    else if (strcmp(argv[1], "tview") == 0)   ret = bam_tview_main(argc-1, argv+1);
    else if (strcmp(argv[1], "--version") == 0) {
        printf(
"samtools %s\n"
"Using htslib %s\n"
"Copyright (C) 2015 Genome Research Ltd.\n",
               samtools_version(), hts_version());
    }
    else if (strcmp(argv[1], "--version-only") == 0) {
        printf("%s+htslib-%s\n", samtools_version(), hts_version());
    }
    else {
        fprintf(pysamerr, "[main] unrecognized command '%s'\n", argv[1]);
        return 1;
    }
    return ret;
}
Exemplo n.º 16
0
Arquivo: tabix.c Projeto: Illumina/akt
int main(int argc, char *argv[])
{
    int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0;
    tbx_conf_t conf = tbx_conf_gff;
    char *reheader = NULL;
    args_t args;
    memset(&args,0,sizeof(args_t));

    static const struct option loptions[] =
    {
        {"help", no_argument, NULL, 2},
        {"regions", required_argument, NULL, 'R'},
        {"targets", required_argument, NULL, 'T'},
        {"csi", no_argument, NULL, 'C'},
        {"zero-based", no_argument, NULL, '0'},
        {"print-header", no_argument, NULL, 'h'},
        {"only-header", no_argument, NULL, 'H'},
        {"begin", required_argument, NULL, 'b'},
        {"comment", required_argument, NULL, 'c'},
        {"end", required_argument, NULL, 'e'},
        {"force", no_argument, NULL, 'f'},
        {"preset", required_argument, NULL, 'p'},
        {"sequence", required_argument, NULL, 's'},
        {"skip-lines", required_argument, NULL, 'S'},
        {"list-chroms", no_argument, NULL, 'l'},
        {"reheader", required_argument, NULL, 'r'},
        {"version", no_argument, NULL, 1},
        {NULL, 0, NULL, 0}
    };

    char *tmp;
    while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0)
    {
        switch (c)
        {
            case 'R': args.regions_fname = optarg; break;
            case 'T': args.targets_fname = optarg; break;
            case 'C': do_csi = 1; break;
            case 'r': reheader = optarg; break;
            case 'h': args.print_header = 1; break;
            case 'H': args.print_header = 1; args.header_only = 1; break;
            case 'l': list_chroms = 1; break;
            case '0': conf.preset |= TBX_UCSC; detect = 0; break;
            case 'b':
                conf.bc = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -b %s\n", optarg);
                detect = 0;
                break;
            case 'e':
                conf.ec = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -e %s\n", optarg);
                detect = 0;
                break;
            case 'c': conf.meta_char = *optarg; detect = 0; break;
            case 'f': is_force = 1; break;
            case 'm':
                min_shift = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -m %s\n", optarg);
                break;
            case 'p':
                detect = 0;
                if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff;
                else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed;
                else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam;
                else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf;
                else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed
                else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf
                else error("The preset string not recognised: '%s'\n", optarg);
                break;
            case 's':
                conf.sc = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -s %s\n", optarg);
                detect = 0;
                break;
            case 'S':
                conf.line_skip = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -S %s\n", optarg);
                detect = 0;
                break;
            case 1:
                printf(
"tabix (htslib) %s\n"
"Copyright (C) 2017 Genome Research Ltd.\n", hts_version());
                return EXIT_SUCCESS;
            default: return usage();
        }
    }

    if ( optind==argc ) return usage();

    if ( list_chroms )
        return query_chroms(argv[optind]);

    if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname )
    {
        int nregs = 0;
        char **regs = NULL;
        if ( !args.header_only )
            regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs);
        return query_regions(&args, argv[optind], regs, nregs);
    }

    char *fname = argv[optind];
    int ftype = file_type(fname);
    if ( detect )  // no preset given
    {
        if ( ftype==IS_GFF ) conf = tbx_conf_gff;
        else if ( ftype==IS_BED ) conf = tbx_conf_bed;
        else if ( ftype==IS_SAM ) conf = tbx_conf_sam;
        else if ( ftype==IS_VCF )
        {
            conf = tbx_conf_vcf;
            if ( !min_shift && do_csi ) min_shift = 14;
        }
        else if ( ftype==IS_BCF )
        {
            if ( !min_shift ) min_shift = 14;
        }
        else if ( ftype==IS_BAM )
        {
            if ( !min_shift ) min_shift = 14;
        }
    }
    if ( do_csi )
    {
        if ( !min_shift ) min_shift = 14;
        min_shift *= do_csi;  // positive for CSIv2, negative for CSIv1
    }
    if ( min_shift!=0 && !do_csi ) do_csi = 1;

    if ( reheader )
        return reheader_file(fname, reheader, ftype, &conf);

    char *suffix = ".tbi";
    if ( do_csi ) suffix = ".csi";
    else if ( ftype==IS_BAM ) suffix = ".bai";
    else if ( ftype==IS_CRAM ) suffix = ".crai";

    char *idx_fname = calloc(strlen(fname) + 5, 1);
    strcat(strcpy(idx_fname, fname), suffix);

    struct stat stat_tbi, stat_file;
    if ( !is_force && stat(idx_fname, &stat_tbi)==0 )
    {
        // Before complaining about existing index, check if the VCF file isn't
        // newer. This is a common source of errors, people tend not to notice
        // that tabix failed
        stat(fname, &stat_file);
        if ( stat_file.st_mtime <= stat_tbi.st_mtime )
            error("[tabix] the index file exists. Please use '-f' to overwrite.\n");
    }
    free(idx_fname);

    if ( ftype==IS_CRAM )
    {
        if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
        return 0;
    }
    else if ( do_csi )
    {
        if ( ftype==IS_BCF )
        {
            if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname);
            return 0;
        }
        if ( ftype==IS_BAM )
        {
            if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
            return 0;
        }
        if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname);
        return 0;
    }
    else    // TBI index
    {
        if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname);
        return 0;
    }
    return 0;
}
Exemplo n.º 17
0
Arquivo: bgzip.c Projeto: Illumina/akt
int main(int argc, char **argv)
{
    int c, compress, pstdout, is_forced, index = 0, rebgzip = 0, reindex = 0;
    BGZF *fp;
    void *buffer;
    long start, end, size;
    char *index_fname = NULL;
    int threads = 1;

    static const struct option loptions[] =
    {
        {"help", no_argument, NULL, 'h'},
        {"offset", required_argument, NULL, 'b'},
        {"stdout", no_argument, NULL, 'c'},
        {"decompress", no_argument, NULL, 'd'},
        {"force", no_argument, NULL, 'f'},
        {"index", no_argument, NULL, 'i'},
        {"index-name", required_argument, NULL, 'I'},
        {"reindex", no_argument, NULL, 'r'},
        {"rebgzip",no_argument,NULL,'g'},
        {"size", required_argument, NULL, 's'},
        {"threads", required_argument, NULL, '@'},
        {"version", no_argument, NULL, 1},
        {NULL, 0, NULL, 0}
    };

    compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
    while((c  = getopt_long(argc, argv, "cdh?fb:@:s:iI:gr",loptions,NULL)) >= 0){
        switch(c){
        case 'd': compress = 0; break;
        case 'c': pstdout = 1; break;
        case 'b': start = atol(optarg); compress = 0; pstdout = 1; break;
        case 's': size = atol(optarg); pstdout = 1; break;
        case 'f': is_forced = 1; break;
        case 'i': index = 1; break;
        case 'I': index_fname = optarg; break;
        case 'g': rebgzip = 1; break;
        case 'r': reindex = 1; compress = 0; break;
        case '@': threads = atoi(optarg); break;
        case 1:
            printf(
"bgzip (htslib) %s\n"
"Copyright (C) 2017 Genome Research Ltd.\n", hts_version());
            return EXIT_SUCCESS;
        case 'h':
        case '?': return bgzip_main_usage();
        }
    }
    if (size >= 0) end = start + size;
    if (end >= 0 && end < start) {
        fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
        return 1;
    }
    if (compress == 1) {
        struct stat sbuf;
        int f_src = fileno(stdin);

        if ( argc>optind )
        {
            if ( stat(argv[optind],&sbuf)<0 )
            {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }

            if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }

            if (pstdout)
                fp = bgzf_open("-", "w");
            else
            {
                char *name = malloc(strlen(argv[optind]) + 5);
                strcpy(name, argv[optind]);
                strcat(name, ".gz");
                fp = bgzf_open(name, is_forced? "w" : "wx");
                if (fp == NULL && errno == EEXIST && confirm_overwrite(name))
                    fp = bgzf_open(name, "w");
                if (fp == NULL) {
                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
                    free(name);
                    return 1;
                }
                free(name);
            }
        }
        else if (!pstdout && isatty(fileno((FILE *)stdout)) )
            return bgzip_main_usage();
        else if ( index && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }
        else
            fp = bgzf_open("-", "w");

        if ( index && rebgzip )
        {
            fprintf(stderr, "[bgzip] Can't produce a index and rebgzip simultaneously\n");
            return 1;
        }

        if ( rebgzip && !index_fname )
        {
            fprintf(stderr, "[bgzip] Index file name expected when writing to stdout\n");
            return 1;
        }

        if (threads > 1)
            bgzf_mt(fp, threads, 256);

        if ( index ) bgzf_index_build_init(fp);
        buffer = malloc(WINDOW_SIZE);
#ifdef _WIN32
        _setmode(f_src, O_BINARY);
#endif
        if (rebgzip){
            if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);

            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
                if (bgzf_block_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
        }
        else {
            while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
                if (bgzf_write(fp, buffer, c) < 0) error("Could not write %d bytes: Error %d\n", c, fp->errcode);
        }
        if ( index )
        {
            if (index_fname) {
                if (bgzf_index_dump(fp, index_fname, NULL) < 0)
                    error("Could not write index to '%s'\n", index_fname);
            } else {
                if (bgzf_index_dump(fp, argv[optind], ".gz.gzi") < 0)
                    error("Could not write index to '%s.gz.gzi'", argv[optind]);
            }
        }
        if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);
        if (argc > optind && !pstdout) unlink(argv[optind]);
        free(buffer);
        close(f_src);
        return 0;
    }
    else if ( reindex )
    {
        if ( argc>optind )
        {
            fp = bgzf_open(argv[optind], "r");
            if ( !fp ) error("[bgzip] Could not open file: %s\n", argv[optind]);
        }
        else
        {
            if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\n");
            fp = bgzf_open("-", "r");
            if ( !fp ) error("[bgzip] Could not read from stdin: %s\n", strerror(errno));
        }

        buffer = malloc(BGZF_BLOCK_SIZE);
        bgzf_index_build_init(fp);
        int ret;
        while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;
        free(buffer);
        if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\n");

        if ( index_fname ) {
            if (bgzf_index_dump(fp, index_fname, NULL) < 0)
                error("Could not write index to '%s'\n", index_fname);
        } else {
            if (bgzf_index_dump(fp, argv[optind], ".gzi") < 0)
                error("Could not write index to '%s.gzi'\n", argv[optind]);
        }

        if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\n",fp->errcode);
        return 0;
    }
    else
    {
        struct stat sbuf;
        int f_dst;

        if ( argc>optind )
        {
            if ( stat(argv[optind],&sbuf)<0 )
            {
                fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
                return 1;
            }
            char *name;
            int len = strlen(argv[optind]);
            if ( strcmp(argv[optind]+len-3,".gz") )
            {
                fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
                return 1;
            }
            fp = bgzf_open(argv[optind], "r");
            if (fp == NULL) {
                fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
                return 1;
            }

            if (pstdout) {
                f_dst = fileno(stdout);
            }
            else {
                const int wrflags = O_WRONLY | O_CREAT | O_TRUNC;
                name = strdup(argv[optind]);
                name[strlen(name) - 3] = '\0';
                f_dst = open(name, is_forced? wrflags : wrflags|O_EXCL, 0666);
                if (f_dst < 0 && errno == EEXIST && confirm_overwrite(name))
                    f_dst = open(name, wrflags, 0666);
                if (f_dst < 0) {
                    fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
                    free(name);
                    return 1;
                }
                free(name);
            }
        }
        else if (!pstdout && isatty(fileno((FILE *)stdin)) )
            return bgzip_main_usage();
        else
        {
            f_dst = fileno(stdout);
            fp = bgzf_open("-", "r");
            if (fp == NULL) {
                fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
                return 1;
            }
        }
        if (threads > 1)
            bgzf_mt(fp, threads, 256);

        buffer = malloc(WINDOW_SIZE);
        if ( start>0 )
        {
            if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\n", argv[optind]);
            if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\n", start);
        }
#ifdef _WIN32
        _setmode(f_dst, O_BINARY);
#endif
        while (1) {
            if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
            else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
            if (c == 0) break;
            if (c < 0) error("Could not read %d bytes: Error %d\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);
            start += c;
            if ( write(f_dst, buffer, c) != c ) {
#ifdef _WIN32
                if (GetLastError() != ERROR_NO_DATA)
#endif
                error("Could not write %d bytes\n", c);
            }
            if (end >= 0 && start >= end) break;
        }
        free(buffer);
        if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
        if (!pstdout) unlink(argv[optind]);
        return 0;
    }
}
Exemplo n.º 18
0
multiReader::multiReader(int argc,char**argv){
  gz=Z_NULL;
  myglf=NULL;myvcf=NULL;mpil=NULL;bglObj=NULL;
  
  nLines=50;
  
  fname=NULL;
  intName=1;
  minQ = MINQ;

  nInd =0;
  isSim =0;
  args=NULL;
  args = setArgStruct(argc,argv);
  fprintf(args->argumentFile,"\t-> Command: \n");
  for(int i=0;i<argc;i++)
    fprintf(args->argumentFile,"%s ",argv[i]);
  //  fprintf(args->argumentFile,"\n\n");
  if(args->argumentFile!=stderr)
    fprintf(args->argumentFile,"\n\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); 
  void printTime(FILE *fp);
  printTime(args->argumentFile); 


  //type = args->inputtype;

  if(args->argc==2) {
    if((!strcasecmp(args->argv[1],"-beagle")) ||
       (!strcasecmp(args->argv[1],"-glf")) ||
       (!strcasecmp(args->argv[1],"-glf3")) ||
       (!strcasecmp(args->argv[1],"-pileup")) ||
       (!strcasecmp(args->argv[1],"-vcf-GL")) ||
       (!strcasecmp(args->argv[1],"-vcf-pl")) ||
       (!strcasecmp(args->argv[1],"-glf10_text")) ||
       (!strcasecmp(args->argv[1],"-vcf-GP"))) {
      printArg(stdout,args);
      exit(0);
    }else if ((!strcasecmp(args->argv[1],"-bam")) ||
	      (!strcasecmp(args->argv[1],"-b"))){
      setArgsBam(args);
      exit(0);
    }else
      return;
  }
  
  getOptions(args);
  if(args->fai==NULL){
    int printAndExit =0;
    switch(args->inputtype)
      {
      case INPUT_GLF:
	printAndExit=1;
	break;
      case INPUT_GLF10_TEXT:
	printAndExit=1;
	break;
      case INPUT_GLF3:
	printAndExit=1;
	break;
      case INPUT_BEAGLE:
	printAndExit=1;
	break;
      case INPUT_PILEUP:
	printAndExit=1;
	break;
      }
    if(printAndExit){
      fprintf(stderr,"\t-> Must supply -fai file\n");
      exit(0);
    }
  }
  
  if(args->fai){
    if(!(args->hd=getHeadFromFai(args->fai)))
      exit(0);
  }else{
    if(args->nams.size()==0){
      fprintf(stderr,"\t-> Must choose inputfile -bam/-glf/-glf3/-pileup/-i/-vcf-gl/-vcf-gp/-vcf-pl/-glf10_text filename\n");
      exit(0);
    }
    if(args->inputtype==INPUT_BAM){
      htsFile *in=sam_open(args->nams[0],"r");
      assert(in);
      args->hd= sam_hdr_read(in);
      hts_close(in);
    }
  }
 
  if(!(INPUT_VCF_GL||INPUT_VCF_GP)){
    if(args->hd==NULL){
      fprintf(stderr,"For non-bams you should include -fai arguments\n");
      exit(0);
    }
  }

  if((args->inputtype==INPUT_PILEUP||args->inputtype==INPUT_GLF||args->inputtype==INPUT_GLF3||args->inputtype==INPUT_GLF10_TEXT)){
    if(nInd==0){
      fprintf(stderr,"\t-> Must supply -nInd when using -glf/-glf3/-pileup/-glf10_text files\n");
      exit(0);
    }
  }else
    args->nInd = args->nams.size();

  if(args->inputtype==INPUT_VCF_GP||args->inputtype==INPUT_VCF_GL){
    if(args->regions.size()>1){
      fprintf(stderr,"\t-> Only one region can be specified with using bcf (i doubt more is needed)  will exit\n");
      exit(0);
    }else if(args->regions.size()<=1){
      myvcf = new vcfReader(args->infile,NULL);
      args->hd=bcf_hdr_2_bam_hdr_t(myvcf->hs);
      args->nInd = myvcf->hs->nsamples;
    }
  }
  //make args->hd
  
  revMap = buildRevTable(args->hd);
  args->revMap = revMap;
  setArgsBam(args);
  if(args->inputtype==INPUT_VCF_GL){
    if(args->regions.size()==1){
      char tmp[1024];
      int start=args->regions[0].start;
      int stop=args->regions[0].stop;
      int ref=args->regions[0].refID;
      snprintf(tmp,1024,"%s:%d-%d",args->hd->target_name[ref],start+1,stop);
      //    fprintf(stderr,"tmp:%s\n",tmp);
      //    exit(0);
      myvcf->seek(tmp);
    }
    
  }


  
  if(fname==NULL)
    return;

  gz=Z_NULL;
  gz=gzopen(fname,"r");   
  if(gz==Z_NULL){
    fprintf(stderr,"\t-> Problem opening file: \'%s\'\n",fname);
    exit(0);
  }

  switch(args->inputtype){
  case INPUT_PILEUP:{
    mpil = new mpileup(args->nInd,gz,args->revMap,minQ);
    break;
  }
  case INPUT_GLF:{
    myglf = new glfReader(args->nInd,gz,10,isSim);
    break;
  }
  case INPUT_GLF3:{
    isSim = 1; //Added by FGV on 22/02/2015: GLF3 is always simulated data until a better alternative can be found
    myglf = new glfReader(args->nInd,gz,3,isSim);
    break;
  }
  case INPUT_GLF10_TEXT:{
    myglf_text = new glfReader_text(args->nInd,gz,args->revMap);
    break;
  }

  case INPUT_BEAGLE:{
    bglObj = new beagle_reader(gz,args->revMap,intName,args->nInd);
    break;
  }
    
  default:{
    break;
  }
  }
  if(args->inputtype==INPUT_VCF_GL||args->inputtype==INPUT_VCF_GL){
    fprintf(stderr,"\t-> VCF still beta. Remember that\n");
    fprintf(stderr,"\t   1. indels are are discarded\n");
    fprintf(stderr,"\t   2. will use chrom, pos PL columns\n");
    fprintf(stderr,"\t   3. GL tags are interpreted as log10 and are scaled to ln (NOT USED)\n");
    fprintf(stderr,"\t   4. GP tags are interpreted directly as unscaled post probs (spec says phredscaled...) (NOT USED)\n");
    fprintf(stderr,"\t   5. FILTER column is currently NOT used (not sure what concensus is)\n");
    fprintf(stderr,"\t   6. -sites does NOT work with vcf input but -r does\n");
    fprintf(stderr,"\t   7. vcffilereading is still BETA, please report strange behaviour\n");
  }
}
Exemplo n.º 19
0
static int mpileup(mplp_conf_t *conf)
{
    if (conf->nfiles == 0) {
        fprintf(stderr,"[%s] no input file/data given\n", __func__);
        exit(EXIT_FAILURE);
    }

    mplp_ref_t mp_ref = MPLP_REF_INIT;
    conf->gplp = (mplp_pileup_t *) calloc(1,sizeof(mplp_pileup_t));
    conf->mplp_data = (mplp_aux_t**) calloc(conf->nfiles, sizeof(mplp_aux_t*));
    conf->plp = (const bam_pileup1_t**) calloc(conf->nfiles, sizeof(bam_pileup1_t*));
    conf->n_plp = (int*) calloc(conf->nfiles, sizeof(int));

    // Allow to run mpileup on multiple regions in one go. This comes at cost: the bai index
    // must be kept in the memory for the whole time which can be a problem with many bams.
    // Therefore if none or only one region is requested, we initialize the bam iterator as
    // before and free the index. Only when multiple regions are queried, we keep the index.
    int nregs = 0;
    if ( conf->reg_fname )
    {
        if ( conf->reg_is_file )
        {
            conf->reg = regidx_init(conf->reg_fname,NULL,NULL,0,NULL);
            if ( !conf->reg ) {
                fprintf(stderr,"Could not parse the regions: %s\n", conf->reg_fname);
                exit(EXIT_FAILURE);
            }
        }
        else
        {
            conf->reg = regidx_init(NULL,regidx_parse_reg,NULL,sizeof(char*),NULL);
            if ( regidx_insert_list(conf->reg,conf->reg_fname,',') !=0 ) {
                fprintf(stderr,"Could not parse the regions: %s\n", conf->reg_fname);
                exit(EXIT_FAILURE);
            }
        }
        nregs = regidx_nregs(conf->reg);
        conf->reg_itr = regitr_init(conf->reg);
        regitr_loop(conf->reg_itr);   // region iterator now positioned at the first region
    }

    // read the header of each file in the list and initialize data
    // beware: mpileup has always assumed that tid's are consistent in the headers, add sanity check at least!
    bam_hdr_t *hdr = NULL;      // header of first file in input list
    int i;
    for (i = 0; i < conf->nfiles; ++i) {
        bam_hdr_t *h_tmp;
        conf->mplp_data[i] = (mplp_aux_t*) calloc(1, sizeof(mplp_aux_t));
        conf->mplp_data[i]->fp = sam_open(conf->files[i], "rb");
        if ( !conf->mplp_data[i]->fp )
        {
            fprintf(stderr, "[%s] failed to open %s: %s\n", __func__, conf->files[i], strerror(errno));
            exit(EXIT_FAILURE);
        }
        if (hts_set_opt(conf->mplp_data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
            fprintf(stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
            exit(EXIT_FAILURE);
        }
        if (conf->fai_fname && hts_set_fai_filename(conf->mplp_data[i]->fp, conf->fai_fname) != 0) {
            fprintf(stderr, "[%s] failed to process %s: %s\n",
                    __func__, conf->fai_fname, strerror(errno));
            exit(EXIT_FAILURE);
        }
        conf->mplp_data[i]->conf = conf;
        conf->mplp_data[i]->ref = &mp_ref;
        h_tmp = sam_hdr_read(conf->mplp_data[i]->fp);
        if ( !h_tmp ) {
            fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, conf->files[i]);
            exit(EXIT_FAILURE);
        }
        conf->mplp_data[i]->h = i ? hdr : h_tmp; // for j==0, "h" has not been set yet
        conf->mplp_data[i]->bam_id = bam_smpl_add_bam(conf->bsmpl,h_tmp->text,conf->files[i]);
        if ( conf->mplp_data[i]->bam_id<0 )
        {
            // no usable readgroups in this bam, it can be skipped
            sam_close(conf->mplp_data[i]->fp);
            free(conf->mplp_data[i]);
            bam_hdr_destroy(h_tmp);
            free(conf->files[i]);
            if ( i+1<conf->nfiles ) memmove(&conf->files[i],&conf->files[i+1],sizeof(*conf->files)*(conf->nfiles-i-1));
            conf->nfiles--;
            i--;
            continue;
        }
        if (conf->reg) {
            hts_idx_t *idx = sam_index_load(conf->mplp_data[i]->fp, conf->files[i]);
            if (idx == NULL) {
                fprintf(stderr, "[%s] fail to load index for %s\n", __func__, conf->files[i]);
                exit(EXIT_FAILURE);
            }
            conf->buf.l = 0;
            ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg+1,conf->reg_itr->end+1);
            conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->buf.s);
            if ( !conf->mplp_data[i]->iter ) 
            {
                conf->mplp_data[i]->iter = sam_itr_querys(idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
                if ( conf->mplp_data[i]->iter ) {
                    fprintf(stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
                    exit(EXIT_FAILURE);
                }
                fprintf(stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
                exit(EXIT_FAILURE);
            }
            if ( nregs==1 ) // no need to keep the index in memory
               hts_idx_destroy(idx);
            else
                conf->mplp_data[i]->idx = idx;
        }

        if ( !hdr ) hdr = h_tmp; /* save the header of first file in list */
        else {
            // FIXME: check consistency between h and h_tmp
            bam_hdr_destroy(h_tmp);

            // we store only the first file's header; it's (alleged to be)
            // compatible with the i-th file's target_name lookup needs
            conf->mplp_data[i]->h = hdr;
        }
    }
    // allocate data storage proportionate to number of samples being studied sm->n
    bam_smpl_get_samples(conf->bsmpl, &conf->gplp->n);
    conf->gplp->n_plp = (int*) calloc(conf->gplp->n, sizeof(int));
    conf->gplp->m_plp = (int*) calloc(conf->gplp->n, sizeof(int));
    conf->gplp->plp = (bam_pileup1_t**) calloc(conf->gplp->n, sizeof(bam_pileup1_t*));  

    fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, conf->gplp->n, conf->nfiles);
    // write the VCF header
    conf->bcf_fp = hts_open(conf->output_fname?conf->output_fname:"-", hts_bcf_wmode(conf->output_type));
    if (conf->bcf_fp == NULL) {
        fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
        exit(EXIT_FAILURE);
    }
    if ( conf->n_threads ) hts_set_threads(conf->bcf_fp, conf->n_threads);

    // BCF header creation
    conf->bcf_hdr = bcf_hdr_init("w");
    conf->buf.l = 0;

    if (conf->record_cmd_line)
    {
        ksprintf(&conf->buf, "##bcftoolsVersion=%s+htslib-%s\n",bcftools_version(),hts_version());
        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);

        conf->buf.l = 0;
        ksprintf(&conf->buf, "##bcftoolsCommand=mpileup");
        for (i=1; i<conf->argc; i++) ksprintf(&conf->buf, " %s", conf->argv[i]);
        kputc('\n', &conf->buf);
        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
    }

    if (conf->fai_fname)
    {
        conf->buf.l = 0;
        ksprintf(&conf->buf, "##reference=file://%s\n", conf->fai_fname);
        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
    }

    // Translate BAM @SQ tags to BCF ##contig tags
    // todo: use/write new BAM header manipulation routines, fill also UR, M5
    for (i=0; i<hdr->n_targets; i++)
    {
        conf->buf.l = 0;
        ksprintf(&conf->buf, "##contig=<ID=%s,length=%d>", hdr->target_name[i], hdr->target_len[i]);
        bcf_hdr_append(conf->bcf_hdr, conf->buf.s);
    }
    conf->buf.l = 0;

    bcf_hdr_append(conf->bcf_hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
#if CDF_MWU_TESTS
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">");
#endif
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">");
    bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">");
    bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">");
    if ( conf->fmt_flag&B2B_FMT_DP )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">");
    if ( conf->fmt_flag&B2B_FMT_DV )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">");
    if ( conf->fmt_flag&B2B_FMT_DPR )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
    if ( conf->fmt_flag&B2B_INFO_DPR )
        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
    if ( conf->fmt_flag&B2B_FMT_DP4 )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">");
    if ( conf->fmt_flag&B2B_FMT_SP )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">");
    if ( conf->fmt_flag&B2B_FMT_AD )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">");
    if ( conf->fmt_flag&B2B_FMT_ADF )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">");
    if ( conf->fmt_flag&B2B_FMT_ADR )
        bcf_hdr_append(conf->bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">");
    if ( conf->fmt_flag&B2B_INFO_AD )
        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">");
    if ( conf->fmt_flag&B2B_INFO_ADF )
        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">");
    if ( conf->fmt_flag&B2B_INFO_ADR )
        bcf_hdr_append(conf->bcf_hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">");
    if ( conf->gvcf )
        gvcf_update_header(conf->gvcf, conf->bcf_hdr);

    int nsmpl;
    const char **smpl = bam_smpl_get_samples(conf->bsmpl, &nsmpl);
    for (i=0; i<nsmpl; i++)
        bcf_hdr_add_sample(conf->bcf_hdr, smpl[i]);
    bcf_hdr_write(conf->bcf_fp, conf->bcf_hdr);

    conf->bca = bcf_call_init(-1., conf->min_baseQ);
    conf->bcr = (bcf_callret1_t*) calloc(nsmpl, sizeof(bcf_callret1_t));
    conf->bca->openQ = conf->openQ, conf->bca->extQ = conf->extQ, conf->bca->tandemQ = conf->tandemQ;
    conf->bca->min_frac = conf->min_frac;
    conf->bca->min_support = conf->min_support;
    conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;

    conf->bc.bcf_hdr = conf->bcf_hdr;
    conf->bc.n  = nsmpl;
    conf->bc.PL = (int32_t*) malloc(15 * nsmpl * sizeof(*conf->bc.PL));
    if (conf->fmt_flag)
    {
        assert( sizeof(float)==sizeof(int32_t) );
        conf->bc.DP4 = (int32_t*) malloc(nsmpl * sizeof(int32_t) * 4);
        conf->bc.fmt_arr = (uint8_t*) malloc(nsmpl * sizeof(float)); // all fmt_flag fields, float and int32
        if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR|B2B_INFO_AD|B2B_INFO_ADF|B2B_INFO_ADR|B2B_FMT_AD|B2B_FMT_ADF|B2B_FMT_ADR) )
        {
            // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample
            conf->bc.ADR = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t));
            conf->bc.ADF = (int32_t*) malloc((nsmpl+1)*B2B_MAX_ALLELES*sizeof(int32_t));
            for (i=0; i<nsmpl; i++)
            {
                conf->bcr[i].ADR = conf->bc.ADR + (i+1)*B2B_MAX_ALLELES;
                conf->bcr[i].ADF = conf->bc.ADF + (i+1)*B2B_MAX_ALLELES;
            }
        }
    }

    // init mpileup
    conf->iter = bam_mplp_init(conf->nfiles, mplp_func, (void**)conf->mplp_data);
    if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(conf->iter);
    if ( (double)conf->max_depth * conf->nfiles > 1<<20)
        fprintf(stderr, "Warning: Potential memory hog, up to %.0fM reads in the pileup!\n", (double)conf->max_depth*conf->nfiles);
    if ( (double)conf->max_depth * conf->nfiles / nsmpl < 250 )
        fprintf(stderr, "Note: The maximum per-sample depth with -d %d is %.1fx\n", conf->max_depth,(double)conf->max_depth * conf->nfiles / nsmpl);
    bam_mplp_set_maxcnt(conf->iter, conf->max_depth);
    conf->max_indel_depth = conf->max_indel_depth * nsmpl;
    conf->bcf_rec = bcf_init1();
    bam_mplp_constructor(conf->iter, pileup_constructor);

    // Run mpileup for multiple regions
    if ( nregs )
    {
        int ireg = 0;
        do 
        {
            // first region is already positioned
            if ( ireg++ > 0 )
            {
                conf->buf.l = 0;
                ksprintf(&conf->buf,"%s:%u-%u",conf->reg_itr->seq,conf->reg_itr->beg,conf->reg_itr->end);

                for (i=0; i<conf->nfiles; i++) 
                {
                    hts_itr_destroy(conf->mplp_data[i]->iter);
                    conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->buf.s);
                    if ( !conf->mplp_data[i]->iter ) 
                    {
                        conf->mplp_data[i]->iter = sam_itr_querys(conf->mplp_data[i]->idx, conf->mplp_data[i]->h, conf->reg_itr->seq);
                        if ( conf->mplp_data[i]->iter ) {
                            fprintf(stderr,"[E::%s] fail to parse region '%s'\n", __func__, conf->buf.s);
                            exit(EXIT_FAILURE);
                        }
                        fprintf(stderr,"[E::%s] the sequence \"%s\" not found: %s\n",__func__,conf->reg_itr->seq,conf->files[i]);
                        exit(EXIT_FAILURE);
                    }
                    bam_mplp_reset(conf->iter);
                }
            }
            mpileup_reg(conf,conf->reg_itr->beg,conf->reg_itr->end);
        }
        while ( regitr_loop(conf->reg_itr) );
    }
    else
        mpileup_reg(conf,0,0);

    flush_bcf_records(conf, conf->bcf_fp, conf->bcf_hdr, NULL);

    // clean up
    free(conf->bc.tmp.s);
    bcf_destroy1(conf->bcf_rec);
    if (conf->bcf_fp)
    {
        hts_close(conf->bcf_fp);
        bcf_hdr_destroy(conf->bcf_hdr);
        bcf_call_destroy(conf->bca);
        free(conf->bc.PL);
        free(conf->bc.DP4);
        free(conf->bc.ADR);
        free(conf->bc.ADF);
        free(conf->bc.fmt_arr);
        free(conf->bcr);
    }
    if ( conf->gvcf ) gvcf_destroy(conf->gvcf);
    free(conf->buf.s);
    for (i = 0; i < conf->gplp->n; ++i) free(conf->gplp->plp[i]);
    free(conf->gplp->plp); free(conf->gplp->n_plp); free(conf->gplp->m_plp); free(conf->gplp);
    bam_mplp_destroy(conf->iter);
    bam_hdr_destroy(hdr);
    for (i = 0; i < conf->nfiles; ++i) {
        if ( nregs>1 ) hts_idx_destroy(conf->mplp_data[i]->idx);
        sam_close(conf->mplp_data[i]->fp);
        if ( conf->mplp_data[i]->iter) hts_itr_destroy(conf->mplp_data[i]->iter);
        free(conf->mplp_data[i]);
    }
    if ( conf->reg_itr ) regitr_destroy(conf->reg_itr);
    free(conf->mplp_data); free(conf->plp); free(conf->n_plp);
    free(mp_ref.ref[0]);
    free(mp_ref.ref[1]);
    return 0;
}
Exemplo n.º 20
0
int main_plugin(int argc, char *argv[])
{
    int c;
    args_t *args  = (args_t*) calloc(1,sizeof(args_t));
    args->argc    = argc;
    args->argv = argv;
    args->output_fname = "-";
    args->output_type = FT_VCF;
    args->nplugin_paths = -1;
    int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0;

    if ( argc==1 ) usage(args);
    char *plugin_name = NULL;
    if ( argv[1][0]!='-' ) {
        plugin_name = argv[1];
        argc--;
        argv++;
    }

    static struct option loptions[] =
    {
        {"version",0,0,'V'},
        {"verbose",0,0,'v'},
        {"help",0,0,'h'},
        {"list-plugins",0,0,'l'},
        {"output",1,0,'o'},
        {"output-type",1,0,'O'},
        {"include",1,0,'i'},
        {"exclude",1,0,'e'},
        {"regions",1,0,'r'},
        {"regions-file",1,0,'R'},
        {"targets",1,0,'t'},
        {"targets-file",1,0,'T'},
        {0,0,0,0}
    };
    while ((c = getopt_long(argc, argv, "h?o:O:r:R:t:T:li:e:vV",loptions,NULL)) >= 0)
    {
        switch (c) {
        case 'V':
            version_only = 1;
            break;
        case 'v':
            args->verbose = 1;
            break;
        case 'o':
            args->output_fname = optarg;
            break;
        case 'O':
            switch (optarg[0]) {
            case 'b':
                args->output_type = FT_BCF_GZ;
                break;
            case 'u':
                args->output_type = FT_BCF;
                break;
            case 'z':
                args->output_type = FT_VCF_GZ;
                break;
            case 'v':
                args->output_type = FT_VCF;
                break;
            default:
                error("The output type \"%s\" not recognised\n", optarg);
            };
            break;
        case 'e':
            args->filter_str = optarg;
            args->filter_logic |= FLT_EXCLUDE;
            break;
        case 'i':
            args->filter_str = optarg;
            args->filter_logic |= FLT_INCLUDE;
            break;
        case 'r':
            args->regions_list = optarg;
            break;
        case 'R':
            args->regions_list = optarg;
            regions_is_file = 1;
            break;
        case 't':
            args->targets_list = optarg;
            break;
        case 'T':
            args->targets_list = optarg;
            targets_is_file = 1;
            break;
        case 'l':
            plist_only = 1;
            break;
        case '?':
        case 'h':
            usage_only = 1;
            break;
        default:
            error("Unknown argument: %s\n", optarg);
        }
    }
    if ( plist_only )  return list_plugins(args);
    if ( usage_only && ! plugin_name ) usage(args);

    load_plugin(args, plugin_name, 1, &args->plugin);
    if ( version_only )
    {
        const char *bver, *hver;
        args->plugin.version(&bver, &hver);
        printf("bcftools  %s using htslib %s\n", bcftools_version(), hts_version());
        printf("plugin at %s using htslib %s\n\n", bver, hver);
        return 0;
    }

    if ( usage_only )
    {
        if ( args->plugin.usage )
            fprintf(stderr,"%s",args->plugin.usage());
        else
            fprintf(stderr,"Usage: bcftools +%s [General Options] -- [Plugin Options]\n",plugin_name);
        return 0;
    }

    if ( args->plugin.run )
    {
        int iopt = optind;
        optind = 0;
        int ret = args->plugin.run(argc-iopt, argv+iopt);
        destroy_data(args);
        free(args);
        return ret;
    }

    char *fname = NULL;
    if ( optind>=argc || argv[optind][0]=='-' )
    {
        if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
        else usage(args);
        args->plugin.argc = argc - optind + 1;
        args->plugin.argv = argv + optind - 1;
    }
    else
    {
        fname = argv[optind];
        args->plugin.argc = argc - optind;
        args->plugin.argv = argv + optind;
    }
    optind = 0;

    args->files = bcf_sr_init();
    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
        args->files->collapse |= COLLAPSE_SOME;
    }
    if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));

    init_data(args);
    while ( bcf_sr_next_line(args->files) )
    {
        bcf1_t *line = bcf_sr_get_line(args->files,0);
        if ( args->filter )
        {
            int pass = filter_test(args->filter, line, NULL);
            if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
            if ( !pass ) continue;
        }
        line = args->plugin.process(line);
        if ( line ) bcf_write1(args->out_fh, args->hdr_out, line);
    }
    destroy_data(args);
    bcf_sr_destroy(args->files);
    free(args);
    return 0;
}
Exemplo n.º 21
0
const char *panda_sam_version(
	void) {
	return hts_version();
}
Exemplo n.º 22
0
static void init_data(args_t *args)
{
    args->prev_rid = args->skip_rid = -1;
    args->hdr = args->files->readers[0].header;

    if ( !args->sample )
    {
        if ( bcf_hdr_nsamples(args->hdr)>1 ) error("Missing the option -s, --sample\n");
        args->sample = strdup(args->hdr->samples[0]);
    }
    if ( !bcf_hdr_nsamples(args->hdr) ) error("No samples in the VCF?\n");

    // Set samples
    kstring_t str = {0,0,0};
    if ( args->estimate_AF && strcmp("-",args->estimate_AF) )
    {
        int i, n;
        char **smpls = hts_readlist(args->estimate_AF, 1, &n);

        // Make sure the query sample is included
        for (i=0; i<n; i++)
            if ( !strcmp(args->sample,smpls[i]) ) break;

        // Add the query sample if not present
        if ( i!=n ) kputs(args->sample, &str);

        for (i=0; i<n; i++)
        {
            if ( str.l ) kputc(',', &str);
            kputs(smpls[i], &str);
            free(smpls[i]);
        }
        free(smpls);
    }
    else if ( !args->estimate_AF )
        kputs(args->sample, &str);

    if ( str.l )
    {
        int ret = bcf_hdr_set_samples(args->hdr, str.s, 0);
        if ( ret<0 ) error("Error parsing the list of samples: %s\n", str.s);
        else if ( ret>0 ) error("The %d-th sample not found in the VCF\n", ret);
    }

    if ( args->af_tag )
        if ( !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) )
            error("No such INFO tag in the VCF: %s\n", args->af_tag);

    args->nsmpl = bcf_hdr_nsamples(args->hdr);
    args->ismpl = bcf_hdr_id2int(args->hdr, BCF_DT_SAMPLE, args->sample);
    free(str.s);

    int i;
    for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.);

    // Init transition matrix and HMM
    double tprob[4];
    MAT(tprob,2,STATE_HW,STATE_HW) = 1 - args->t2AZ;
    MAT(tprob,2,STATE_HW,STATE_AZ) = args->t2HW;
    MAT(tprob,2,STATE_AZ,STATE_HW) = args->t2AZ;
    MAT(tprob,2,STATE_AZ,STATE_AZ) = 1 - args->t2HW; 

    if ( args->genmap_fname ) 
    {
        args->hmm = hmm_init(2, tprob, 0);
        hmm_set_tprob_func(args->hmm, set_tprob_genmap, args);
    }
    else if ( args->rec_rate > 0 )
    {
        args->hmm = hmm_init(2, tprob, 0);
        hmm_set_tprob_func(args->hmm, set_tprob_recrate, args);

    }
    else
        args->hmm = hmm_init(2, tprob, 10000);

    // print header
    printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
    printf("# The command line was:\tbcftools %s", args->argv[0]);
    for (i=1; i<args->argc; i++)
        printf(" %s",args->argv[i]);
    printf("\n#\n");
    printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
}
Exemplo n.º 23
0
int main(int argc, char** argv){
  if(strcmp(ZLIB_VERSION,zlib_version)){
    fprintf(stderr,"\t-> Problem with difference in zlib version used for compiling and linking\n");
    fprintf(stderr,"\t-> ZLIB_VERSION: %s zlibversion: %s \n",ZLIB_VERSION,zlib_version);
    //return 0; 
  }
  fprintf(stderr,"\t-> angsd version: %s (htslib: %s) build(%s %s)\n",ANGSD_VERSION,hts_version(),__DATE__,__TIME__); 
  //no arguments supplied -> print info
  if(argc==1||(argc==2&&(strcasecmp(argv[1],"--version")==0||strcasecmp(argv[1],"--help")==0))){//if haven't been supplied with arguments, load default,print, and exit
    printProgInfo(stderr);
    return 0;
  }
   
   if(!strcasecmp("sites",argv[1])){
     //from prep_sites.* used for indexing -sites files
     int main_sites(int argc,char **argv);
     main_sites(--argc,++argv);
     return 0;
   }

   //print time
   clock_t t=clock();
   time_t t2=time(NULL);

   //intialize our signal handler for ctrl+c
   catchkill();

   argStruct *args=NULL;
   if(argc==2){
     fprintf(stderr,"\t-> Analysis helpbox/synopsis information:\n");
     multiReader mr(argc,argv);
     args = mr.getargs();
     
     init(args);//program dies here after printing info, if a match is found
     fprintf(stderr,"\nUnknown argument supplied: \'%s\'\n\n",argv[1]);
     printProgInfo(stderr);
     exit(0);//important otherwise the abc classes will try to clean up, which doesnt make sense in this context
     //     return 0;
   }

   multiReader *mr= new multiReader(argc,argv);
   args = mr->getargs();

   

   init(args);
   //   fprintf(stderr,"adsfadsf\n");exit(0);
   parseArgStruct(args);

   //Below is main loop which will run until nomore data
   assert(args->hd);
   assert(args->revMap);
   int lastRefId=-1;
   while(SIG_COND) {
     funkyPars *tmp = mr->fetch(); //YES MISTER FETCH
      if(tmp==NULL)
	break;
     if(lastRefId==-1||lastRefId!=tmp->refId){
       lastRefId=tmp->refId;
       void changeChr(int refId);//<-ugly is located in shared.cpp, to force a change of chr, when notusing bamfiles
       changeChr(lastRefId);
     }
    
     selector(tmp);
      
   }

     


   //now we have no more data lets wait and cleanup

  fprintf(stderr,"\t-> Done reading data waiting for calculations to finish\n");

  destroy_shared();

  
  //printout the filenames generated
  fprintf(stderr,"\t-> Output filenames:\n");
  for(int i=0;i<(int)dumpedFiles.size();i++){
    fprintf(stderr,"\t\t->\"%s\"\n",dumpedFiles[i]);
    fprintf(args->argumentFile,"\t\t->\"%s\"\n",dumpedFiles[i]);
    free(dumpedFiles[i]);
  }
  // fprintf(stderr,"\n");
  fprintf(args->argumentFile,"\n");
  // fprintf(stderr,"\t");
  printTime(stderr);

  
  
  //print out nice messages
  
  fprintf(stderr,"\t-> Arguments and parameters for all analysis are located in .arg file\n");
  fprintf(stderr, "\t[ALL done] cpu-time used =  %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
  fprintf(stderr, "\t[ALL done] walltime used =  %.2f sec\n", (float)(time(NULL) - t2));  
  fprintf(args->argumentFile, "\t[ALL done] cpu-time used =  %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
  fprintf(args->argumentFile, "\t[ALL done] walltime used =  %.2f sec\n", (float)(time(NULL) - t2));  
  
  //make better below
  extern int *bamSortedIds;
  delete [] bamSortedIds;
  
  delete mr;

  return 0;
}
Exemplo n.º 24
0
void version(const char **bcftools_version, const char **htslib_version)
{
    *bcftools_version = BCFTOOLS_VERSION;
    *htslib_version = hts_version();
}
Exemplo n.º 25
0
//last is from abc.h
void print_bcf_header(htsFile *fp,bcf_hdr_t *hdr,argStruct *args,kstring_t &buf,const bam_hdr_t *bhdr){
  assert(args);
  ksprintf(&buf, "##angsdVersion=%s+htslib-%s\n",angsd_version(),hts_version());
  bcf_hdr_append(hdr, buf.s);
  
  buf.l = 0;
  ksprintf(&buf, "##angsdCommand=");
  for (int i=1; i<args->argc; i++)
    ksprintf(&buf, " %s", args->argv[i]);
  kputc('\n', &buf);
  bcf_hdr_append(hdr, buf.s);
  buf.l=0;

  if(args->ref){
    buf.l = 0;
    ksprintf(&buf, "##reference=file://%s\n", args->ref);
    bcf_hdr_append(hdr,buf.s);
  }
  if (args->anc){
    buf.l = 0;
    ksprintf(&buf, "##ancestral=file://%s\n", args->anc);
    bcf_hdr_append(hdr,buf.s);
  }

  // Translate BAM @SQ tags to BCF ##contig tags
  // todo: use/write new BAM header manipulation routines, fill also UR, M5
  
  for (int i=0; i<bhdr->n_targets; i++){
    buf.l = 0; 
    ksprintf(&buf, "##contig=<ID=%s,length=%d>", bhdr->target_name[i], bhdr->target_len[i]);
    bcf_hdr_append(hdr, buf.s);
  }
  buf.l = 0;
  bcf_hdr_append(hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">");
  bcf_hdr_append(hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">");
  bcf_hdr_append(hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of raw reads supporting an indel\">");
  bcf_hdr_append(hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of raw reads supporting an indel\">");
  bcf_hdr_append(hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
  bcf_hdr_append(hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
  bcf_hdr_append(hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
  bcf_hdr_append(hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">");
  bcf_hdr_append(hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">");
  bcf_hdr_append(hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">");
  bcf_hdr_append(hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">");
  bcf_hdr_append(hdr,"##INFO=<ID=AF,Number=A,Type=Float,Description=\"Minor Allele Frequency\">");
  bcf_hdr_append(hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
  bcf_hdr_append(hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">");
  bcf_hdr_append(hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">");
  bcf_hdr_append(hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
  bcf_hdr_append(hdr,"##FORMAT=<ID=GL,Number=G,Type=Float,Description=\"scaled Genotype Likelihoods (loglikeratios to the most likely (in log10))\">");
  //fprintf(stderr,"samples from sm:%d\n",args->sm->n);
  for(int i=0;i<args->sm->n;i++){
    bcf_hdr_add_sample(hdr, args->sm->smpl[i]);
  }
  bcf_hdr_add_sample(hdr, NULL);      // to update internal structures
  
  if ( bcf_hdr_write(fp, hdr)!=0 )
    fprintf(stderr,"Failed to write bcf\n");
  buf.l=0;
}
Exemplo n.º 26
0
static void init_data(args_t *args)
{
    bcf_srs_t *files = bcf_sr_init();
    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(files, args->regions_list, args->regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(files, args->targets_list, args->targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
    }
    if ( !bcf_sr_add_reader(files, args->fname) ) error("Failed to open %s: %s\n", args->fname,bcf_sr_strerror(files->errnum));
    bcf_hdr_t *hdr = files->readers[0].header;
    if ( !args->sample )
    {
        if ( bcf_hdr_nsamples(hdr)>1 ) error("Missing the option -s, --sample\n");
        args->sample = hdr->samples[0];
    }
    else if ( bcf_hdr_id2int(hdr,BCF_DT_SAMPLE,args->sample)<0 ) error("No such sample: %s\n", args->sample);
    int ret = bcf_hdr_set_samples(hdr, args->sample, 0);
    if ( ret<0 ) error("Error setting the sample: %s\n", args->sample);

    if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,bcf_hdr_id2int(hdr,BCF_DT_ID,"BAF")) )
        error("The tag FORMAT/BAF is not present in the VCF: %s\n", args->fname);

    int i;
    args->xvals = (double*) calloc(args->nbins,sizeof(double));
    for (i=0; i<args->nbins; i++) args->xvals[i] = 1.0*i/(args->nbins-1);

    // collect BAF distributions for all chromosomes
    int idist = -1, nbaf = 0, nprocessed = 0, ntotal = 0, prev_chr = -1;
    float *baf = NULL;
    while ( bcf_sr_next_line(files) )
    {
        ntotal++;

        bcf1_t *line = bcf_sr_get_line(files,0);
        if ( bcf_get_format_float(hdr,line,"BAF",&baf,&nbaf) != 1 ) continue;
        if ( bcf_float_is_missing(baf[0]) ) continue;

        nprocessed++;

        if ( prev_chr==-1 || prev_chr!=line->rid )
        {
            // new chromosome
            idist = args->ndist++;
            args->dist = (dist_t*) realloc(args->dist, sizeof(dist_t)*args->ndist);
            memset(&args->dist[idist],0,sizeof(dist_t));
            args->dist[idist].chr   = strdup(bcf_seqname(hdr,line));
            args->dist[idist].yvals = (double*) calloc(args->nbins,sizeof(double));
            args->dist[idist].xvals = args->xvals;
            args->dist[idist].nvals = args->nbins;
            prev_chr = line->rid;
        }
        int bin = baf[0]*(args->nbins-1);
        args->dist[idist].yvals[bin]++;   // the distribution
    }
    free(baf);
    bcf_sr_destroy(files);

    for (idist=0; idist<args->ndist; idist++)
    {
        #if 0
            int j;
            for (j=0; j<args->nbins; j++)
            {
                double x = args->dist[idist].xvals[j];
                args->dist[idist].yvals[j] = exp(-(x-0.5)*(x-0.5)/1e-3);
            }
        #endif
        init_dist(args, &args->dist[idist],args->verbose);
    }

    args->dat_fp = open_file(&args->dat_fname,"w","%s/dist.dat", args->output_dir);
    fprintf(args->dat_fp, "# This file was produced by: bcftools polysomy(%s+htslib-%s), the command line was:\n", bcftools_version(),hts_version());
    fprintf(args->dat_fp, "# \t bcftools %s ", args->argv[0]);
    for (i=1; i<args->argc; i++)
        fprintf(args->dat_fp, " %s",args->argv[i]);
    fprintf(args->dat_fp,"\n#\n");
    fprintf(args->dat_fp,"# DIST\t[2]Chrom\t[3]BAF\t[4]Normalized Count\n");
    fprintf(args->dat_fp,"# FIT\t[2]Goodness of Fit\t[3]iFrom\t[4]iTo\t[5]The Fitted Function\n");
    fprintf(args->dat_fp,"# CN\t[2]Chrom\t[3]Estimated Copy Number\t[4]Absolute fit deviation\n");

    char *fname = NULL;
    FILE *fp = open_file(&fname,"w","%s/dist.py", args->output_dir);
//-------- matplotlib script --------------
    fprintf(fp,
        "#!/usr/bin/env python\n"
        "#\n"
        "import matplotlib as mpl\n"
        "mpl.use('Agg')\n"
        "import matplotlib.pyplot as plt\n"
        "import csv,sys,argparse\n"
        "from math import exp\n"
        "\n"
        "outdir = '%s'\n"
        "\n"
        "def read_dat(dat,fit,cn):\n"
        "   csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)\n"
        "   with open(outdir+'/dist.dat', 'rb') as f:\n"
        "      reader = csv.reader(f, 'tab')\n"
        "      for row in reader:\n"
        "          if row[0][0]=='#': continue\n"
        "          type = row[0]\n"
        "          chr  = row[1]\n"
        "          if type=='DIST':\n"
        "              if chr not in dat: dat[chr] = []\n"
        "              dat[chr].append(row)\n"
        "          elif type=='FIT':\n"
        "              if chr not in fit: fit[chr] = []\n"
        "              fit[chr].append(row)\n"
        "          elif type=='CN':\n"
        "              cn[chr] = row[2]\n"
        "\n"
        "def plot_dist(dat,fit,chr):\n"
        "   fig, ax = plt.subplots(1, 1, figsize=(7,5))\n"
        "   ax.plot([x[2] for x in dat[chr]],[x[3] for x in dat[chr]],'k-',label='Distribution')\n"
        "   if chr in fit:\n"
        "       for i in range(len(fit[chr])):\n"
        "           pfit = fit[chr][i]\n"
        "           exec('def xfit(x): return '+pfit[5])\n"
        "           istart = int(pfit[3])\n"
        "           iend   = int(pfit[4])+1\n"
        "           vals   = dat[chr][istart:iend]\n"
        "           args   = {}\n"
        "           if i==0: args = {'label':'Target to Fit'}\n"
        "           ax.plot([x[2] for x in vals],[x[3] for x in vals],'r-',**args)\n"
        "           if i==0: args = {'label':'Best Fit'}\n"
        "           ax.plot([x[2] for x in vals],[xfit(float(x[2])) for x in vals],'g-',**args)\n"
        "   ax.set_title('BAF distribution, chr'+chr)\n"
        "   ax.set_xlabel('BAF')\n"
        "   ax.set_ylabel('Frequency')\n"
        "   ax.legend(loc='best',prop={'size':7},frameon=False)\n"
        "   plt.savefig(outdir+'/dist.chr'+chr+'.png')\n"
        "   plt.close()\n"
        "\n"
        "def plot_copy_number(cn):\n"
        "   fig, ax = plt.subplots(1, 1, figsize=(7,5))\n"
        "   xlabels = sorted(cn.keys())\n"
        "   xvals = range(len(xlabels))\n"
        "   yvals = [float(cn[x]) for x in xlabels]\n"
        "   ax.plot(xvals,yvals,'o',color='red')\n"
        "   for i in range(len(xvals)):\n"
        "       if yvals[i]==-1: ax.annotate('?', xy=(xvals[i],0.5),va='center',ha='center',color='red',fontweight='bold')\n"
        "   ax.tick_params(axis='both', which='major', labelsize=9)\n"
        "   ax.set_xticks(xvals)\n"
        "   ax.set_xticklabels(xlabels,rotation=45)\n"
        "   ax.set_xlim(-1,len(xlabels))\n"
        "   ax.set_ylim(0,5.0)\n"
        "   ax.set_yticks([1.0,2.0,3.0,4.0])\n"
        "   ax.set_xlabel('Chromosome')\n"
        "   ax.set_ylabel('Copy Number')\n"
        "   plt.savefig(outdir+'/copy-number.png')\n"
        "   plt.close()\n"
        "\n"
        "class myParser(argparse.ArgumentParser):\n"
        "   def error(self, message):\n"
        "       self.print_help()\n"
        "       sys.stderr.write('error: %%s\\n' %% message)\n"
        "       sys.exit(2)\n"
        "\n"
        "def main():\n"
        "   parser = myParser()\n"
        "   parser.add_argument('-a', '--all', action='store_true', help='Create all plots')\n"
        "   parser.add_argument('-c', '--copy-number', action='store_true', help='Create copy-number plot')\n"
        "   parser.add_argument('-d', '--distrib', metavar='CHR', help='Plot BAF distribution of a single chromosome')\n"
        "   args = parser.parse_args()\n"
        "   dat = {}; fit = {}; cn = {}\n"
        "   read_dat(dat,fit,cn)\n"
        "   if args.distrib!=None:\n"
        "       plot_dist(dat,fit,args.distrib)\n"
        "   if args.all:\n"
        "       for chr in dat: plot_dist(dat,fit,chr)\n"
        "       plot_copy_number(cn)\n"
        "   elif args.copy_number:\n"
        "       plot_copy_number(cn)\n"
        "   else:\n"
        "       for chr in dat: plot_dist(dat,fit,chr)\n"
        "\n"
        "if __name__ == '__main__':\n"
        "   main()\n",
        args->output_dir);
//---------------------------------------
    chmod(fname, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH|S_IXUSR|S_IXGRP|S_IXOTH);
    free(fname);
    fclose(fp);
}
Exemplo n.º 27
0
/*
 * Performs pileup
 * @param conf configuration for this pileup
 * @param n number of files specified in fn
 * @param fn filenames
 */
static int mpileup(mplp_conf_t *conf, int n, char **fn)
{
    extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
    extern void bcf_call_del_rghash(void *rghash);
    mplp_aux_t **data;
    int i, tid, pos, *n_plp, tid0 = -1, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid = -1, max_depth, max_indel_depth;
    const bam_pileup1_t **plp;
    bam_mplp_t iter;
    bam_hdr_t *h = NULL; /* header of first file in input list */
    char *ref;
    void *rghash = NULL;
    FILE *pileup_fp = NULL;

    bcf_callaux_t *bca = NULL;
    bcf_callret1_t *bcr = NULL;
    bcf_call_t bc;
    htsFile *bcf_fp = NULL;
    bcf_hdr_t *bcf_hdr = NULL;

    bam_sample_t *sm = NULL;
    kstring_t buf;
    mplp_pileup_t gplp;

    memset(&gplp, 0, sizeof(mplp_pileup_t));
    memset(&buf, 0, sizeof(kstring_t));
    memset(&bc, 0, sizeof(bcf_call_t));
    data = calloc(n, sizeof(mplp_aux_t*));
    plp = calloc(n, sizeof(bam_pileup1_t*));
    n_plp = calloc(n, sizeof(int));
    sm = bam_smpl_init();

    if (n == 0) {
        fprintf(stderr,"[%s] no input file/data given\n", __func__);
        exit(1);
    }

    // read the header of each file in the list and initialize data
    for (i = 0; i < n; ++i) {
        bam_hdr_t *h_tmp;
        data[i] = calloc(1, sizeof(mplp_aux_t));
        data[i]->fp = sam_open(fn[i], "rb");
        if ( !data[i]->fp )
        {
            fprintf(stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
            exit(1);
        }
        hts_set_fai_filename(data[i]->fp, conf->fai_fname);
        data[i]->conf = conf;
        h_tmp = sam_hdr_read(data[i]->fp);
        if ( !h_tmp ) {
            fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
            exit(1);
        }
        data[i]->h = i? h : h_tmp; // for i==0, "h" has not been set yet
        bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text);
        // Collect read group IDs with PL (platform) listed in pl_list (note: fragile, strstr search)
        rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list);
        if (conf->reg) {
            hts_idx_t *idx = sam_index_load(data[i]->fp, fn[i]);
            if (idx == 0) {
                fprintf(stderr, "[%s] fail to load index for %s\n", __func__, fn[i]);
                exit(1);
            }
            if ( (data[i]->iter=sam_itr_querys(idx, data[i]->h, conf->reg)) == 0) {
                fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, conf->reg);
                exit(1);
            }
            if (i == 0) tid0 = data[i]->iter->tid, beg0 = data[i]->iter->beg, end0 = data[i]->iter->end;
            hts_idx_destroy(idx);
        }
        if (i == 0) h = h_tmp; /* save the header of first file in list */
        else {
            // FIXME: to check consistency
            bam_hdr_destroy(h_tmp);
        }
    }
    // allocate data storage proportionate to number of samples being studied sm->n
    gplp.n = sm->n;
    gplp.n_plp = calloc(sm->n, sizeof(int));
    gplp.m_plp = calloc(sm->n, sizeof(int));
    gplp.plp = calloc(sm->n, sizeof(bam_pileup1_t*));

    fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
    // write the VCF header
    if (conf->flag & MPLP_BCF)
    {
        const char *mode;
        if ( conf->flag & MPLP_VCF )
            mode = (conf->flag&MPLP_NO_COMP)? "wu" : "wz";   // uncompressed VCF or compressed VCF
        else
            mode = (conf->flag&MPLP_NO_COMP)? "wub" : "wb";  // uncompressed BCF or compressed BCF

        bcf_fp = bcf_open(conf->output_fname? conf->output_fname : "-", mode);
        if (bcf_fp == NULL) {
            fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
            exit(1);
        }

        bcf_hdr = bcf_hdr_init("w");
        kstring_t str = {0,0,0};

        ksprintf(&str, "##samtoolsVersion=%s+htslib-%s\n",samtools_version(),hts_version());
        bcf_hdr_append(bcf_hdr, str.s);

        str.l = 0;
        ksprintf(&str, "##samtoolsCommand=samtools mpileup");
        for (i=1; i<conf->argc; i++) ksprintf(&str, " %s", conf->argv[i]);
        kputc('\n', &str);
        bcf_hdr_append(bcf_hdr, str.s);

        if (conf->fai_fname)
        {
            str.l = 0;
            ksprintf(&str, "##reference=file://%s\n", conf->fai_fname);
            bcf_hdr_append(bcf_hdr, str.s);
        }

        // todo: use/write new BAM header manipulation routines, fill also UR, M5
        for (i=0; i<h->n_targets; i++)
        {
            str.l = 0;
            ksprintf(&str, "##contig=<ID=%s,length=%d>", h->target_name[i], h->target_len[i]);
            bcf_hdr_append(bcf_hdr, str.s);
        }
        free(str.s);
        bcf_hdr_append(bcf_hdr,"##ALT=<ID=X,Description=\"Represents allele(s) other than observed.\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
#if CDF_MWU_TESTS
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">");
#endif
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">");
        bcf_hdr_append(bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">");
        bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">");
        if ( conf->fmt_flag&B2B_FMT_DP )
            bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">");
        if ( conf->fmt_flag&B2B_FMT_DV )
            bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">");
        if ( conf->fmt_flag&B2B_FMT_DPR )
            bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
        if ( conf->fmt_flag&B2B_INFO_DPR )
            bcf_hdr_append(bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
        if ( conf->fmt_flag&B2B_FMT_DP4 )
            bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">");
        if ( conf->fmt_flag&B2B_FMT_SP )
            bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">");

        for (i=0; i<sm->n; i++)
            bcf_hdr_add_sample(bcf_hdr, sm->smpl[i]);
        bcf_hdr_add_sample(bcf_hdr, NULL);
        bcf_hdr_write(bcf_fp, bcf_hdr);

        bca = bcf_call_init(-1., conf->min_baseQ);
        bcr = calloc(sm->n, sizeof(bcf_callret1_t));
        bca->rghash = rghash;
        bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ;
        bca->min_frac = conf->min_frac;
        bca->min_support = conf->min_support;
        bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;

        bc.bcf_hdr = bcf_hdr;
        bc.n = sm->n;
        bc.PL = malloc(15 * sm->n * sizeof(*bc.PL));
        if (conf->fmt_flag)
        {
            assert( sizeof(float)==sizeof(int32_t) );
            bc.DP4 = malloc(sm->n * sizeof(int32_t) * 4);
            bc.fmt_arr = malloc(sm->n * sizeof(float)); // all fmt_flag fields
            if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR) )
            {
                // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample
                bc.DPR = malloc((sm->n+1)*B2B_MAX_ALLELES*sizeof(int32_t));
                for (i=0; i<sm->n; i++)
                    bcr[i].DPR = bc.DPR + (i+1)*B2B_MAX_ALLELES;
            }
        }
    }
    else {
        pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : stdout;

        if (pileup_fp == NULL) {
            fprintf(stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
            exit(1);
        }
    }

    if (tid0 >= 0 && conf->fai) { // region is set
        ref = faidx_fetch_seq(conf->fai, h->target_name[tid0], 0, 0x7fffffff, &ref_len);
        ref_tid = tid0;
        for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid0;
    } else ref_tid = -1, ref = 0;

    // begin pileup
    iter = bam_mplp_init(n, mplp_func, (void**)data);
    if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(iter);
    max_depth = conf->max_depth;
    if (max_depth * sm->n > 1<<20)
        fprintf(stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
    if (max_depth * sm->n < 8000) {
        max_depth = 8000 / sm->n;
        fprintf(stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth);
    }
    max_indel_depth = conf->max_indel_depth * sm->n;
    bam_mplp_set_maxcnt(iter, max_depth);
    bcf1_t *bcf_rec = bcf_init1();
    int ret;
    while ( (ret=bam_mplp_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
        if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
        if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
        if (tid != ref_tid) {
            free(ref); ref = 0;
            if (conf->fai) ref = faidx_fetch_seq(conf->fai, h->target_name[tid], 0, 0x7fffffff, &ref_len);
            for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid;
            ref_tid = tid;
        }
        if (conf->flag & MPLP_BCF) {
            int total_depth, _ref0, ref16;
            for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
            group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG);
            _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
            ref16 = seq_nt16_table[_ref0];
            bcf_callaux_clean(bca, &bc);
            for (i = 0; i < gplp.n; ++i)
                bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i);
            bc.tid = tid; bc.pos = pos;
            bcf_call_combine(gplp.n, bcr, bca, ref16, &bc);
            bcf_clear1(bcf_rec);
            bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, 0, 0);
            bcf_write1(bcf_fp, bcf_hdr, bcf_rec);
            // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
            if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0)
            {
                bcf_callaux_clean(bca, &bc);
                for (i = 0; i < gplp.n; ++i)
                    bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
                if (bcf_call_combine(gplp.n, bcr, bca, -1, &bc) >= 0) {
                    bcf_clear1(bcf_rec);
                    bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, bca, ref);
                    bcf_write1(bcf_fp, bcf_hdr, bcf_rec);
                }
            }
        } else {
            fprintf(pileup_fp, "%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
            for (i = 0; i < n; ++i) {
                int j, cnt;
                for (j = cnt = 0; j < n_plp[i]; ++j) {
                    const bam_pileup1_t *p = plp[i] + j;
                    if (bam_get_qual(p->b)[p->qpos] >= conf->min_baseQ) ++cnt;
                }
                fprintf(pileup_fp, "\t%d\t", cnt);
                if (n_plp[i] == 0) {
                    fputs("*\t*", pileup_fp);
                    if (conf->flag & MPLP_PRINT_MAPQ) fputs("\t*", pileup_fp);
                    if (conf->flag & MPLP_PRINT_POS) fputs("\t*", pileup_fp);
                } else {
                    for (j = 0; j < n_plp[i]; ++j) {
                        const bam_pileup1_t *p = plp[i] + j;
                        if (bam_get_qual(p->b)[p->qpos] >= conf->min_baseQ)
                            pileup_seq(pileup_fp, plp[i] + j, pos, ref_len, ref);
                    }
                    putc('\t', pileup_fp);
                    for (j = 0; j < n_plp[i]; ++j) {
                        const bam_pileup1_t *p = plp[i] + j;
                        int c = bam_get_qual(p->b)[p->qpos];
                        if (c >= conf->min_baseQ) {
                            c = c + 33 < 126? c + 33 : 126;
                            putc(c, pileup_fp);
                        }
                    }
                    if (conf->flag & MPLP_PRINT_MAPQ) {
                        putc('\t', pileup_fp);
                        for (j = 0; j < n_plp[i]; ++j) {
                            const bam_pileup1_t *p = plp[i] + j;
                            int c = bam_get_qual(p->b)[p->qpos];
                            if ( c < conf->min_baseQ ) continue;
                            c = plp[i][j].b->core.qual + 33;
                            if (c > 126) c = 126;
                            putc(c, pileup_fp);
                        }
                    }
                    if (conf->flag & MPLP_PRINT_POS) {
                        putc('\t', pileup_fp);
                        for (j = 0; j < n_plp[i]; ++j) {
                            if (j > 0) putc(',', pileup_fp);
                            fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow...
                        }
                    }
                }
            }
            putc('\n', pileup_fp);
        }
    }

    // clean up
    free(bc.tmp.s);
    bcf_destroy1(bcf_rec);
    if (bcf_fp)
    {
        hts_close(bcf_fp);
        bcf_hdr_destroy(bcf_hdr);
        bcf_call_destroy(bca);
        free(bc.PL);
        free(bc.DP4);
        free(bc.DPR);
        free(bc.fmt_arr);
        free(bcr);
    }
    if (pileup_fp && conf->output_fname) fclose(pileup_fp);
    bam_smpl_destroy(sm); free(buf.s);
    for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]);
    free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp);
    bcf_call_del_rghash(rghash);
    bam_mplp_destroy(iter);
    bam_hdr_destroy(h);
    for (i = 0; i < n; ++i) {
        sam_close(data[i]->fp);
        if (data[i]->iter) hts_itr_destroy(data[i]->iter);
        free(data[i]);
    }
    free(data); free(plp); free(ref); free(n_plp);
    return ret;
}