Beispiel #1
0
int
main_filter(int argc, char *argv[])
{
     filter_conf_t cfg;
     char *vcf_in = NULL, *vcf_out = NULL;
     static int print_only_passed = 1;
     static int sb_filter_no_compound = 0;
     static int sb_filter_incl_indels = 0;
     static int only_indels = 0;
     static int only_snvs = 0;
     char *vcf_header = NULL;
     mtc_qual_t *mtc_quals = NULL;
     long int num_vars;
     static int no_defaults = 0;
     long int var_idx = -1;

     /* default filter options */
     memset(&cfg, 0, sizeof(filter_conf_t));
     cfg.dp_filter.min = cfg.dp_filter.max = -1;
     cfg.af_filter.min = cfg.af_filter.max = -1;
     cfg.sb_filter.alpha = DEFAULT_SIG;
     cfg.snvqual_filter.alpha = DEFAULT_SIG;
     cfg.indelqual_filter.alpha = DEFAULT_SIG;


    /* keep in sync with long_opts_str and usage
     *
     * getopt is a pain in the whole when it comes to syncing of long
     * and short args and usage. check out gopt, libcfu...
     */
    while (1) {
         int c;
         static struct option long_opts[] = {
              /* see usage sync */
              {"verbose", no_argument, &verbose, 1},
              {"debug", no_argument, &debug, 1},
              {"print-all", no_argument, &print_only_passed, 0},
              {"no-defaults", no_argument, &no_defaults, 1},
              {"only-indels", no_argument, &only_indels, 1},
              {"only-snvs", no_argument, &only_snvs, 1},

              {"help", no_argument, NULL, 'h'},
              {"in", required_argument, NULL, 'i'},
              {"out", required_argument, NULL, 'o'},

              {"cov-min", required_argument, NULL, 'v'},
              {"cov-max", required_argument, NULL, 'V'},

              {"af-min", required_argument, NULL, 'a'},
              {"af-max", required_argument, NULL, 'A'},

              {"sb-thresh", required_argument, NULL, 'B'},
              {"sb-mtc", required_argument, NULL, 'b'},
              {"sb-alpha", required_argument, NULL, 'c'},
              {"sb-no-compound", no_argument, &sb_filter_no_compound, 1},
              {"sb-incl-indels", no_argument, &sb_filter_incl_indels, 1},

              {"snvqual-thresh", required_argument, NULL, 'Q'},
              {"snvqual-mtc", required_argument, NULL, 'q'},
              {"snvqual-alpha", required_argument, NULL, 'r'},
              {"snvqual-ntests", required_argument, NULL, 's'},

              {"indelqual-thresh", required_argument, NULL, 'K'},
              {"indelqual-mtc", required_argument, NULL, 'k'},
              {"indelqual-alpha", required_argument, NULL, 'l'},
              {"indelqual-ntests", required_argument, NULL, 'm'},

              {0, 0, 0, 0} /* sentinel */
         };

         /* keep in sync with long_opts and usage */
         static const char *long_opts_str = "hi:o:v:V:a:A:B:b:c:Q:q:r:s:K:k:l:m:";

         /* getopt_long stores the option index here. */
         int long_opts_index = 0;
         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
                         long_opts_str, long_opts, & long_opts_index);
         if (c == -1) {
              break;
         }

         switch (c) {
         /* keep in sync with long_opts etc */
         case 'h':
              usage(& cfg);
              return 0;

         case 'i':
              vcf_in = strdup(optarg);
              break;
         case 'o':
              if (0 != strcmp(optarg, "-")) {
                   if (file_exists(optarg)) {
                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
                        return 1;
                   }
              }
              vcf_out = strdup(optarg);
              break;

         case 'v':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.dp_filter.min = atoi(optarg);
              break;
         case 'V':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.dp_filter.max = atoi(optarg);
              break;

         case 'a':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.af_filter.min = strtof(optarg, NULL);
              break;
         case 'A':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.af_filter.max = strtof(optarg, NULL);
              break;

         case 'B':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.sb_filter.thresh = atoi(optarg);
              break;
         case 'b':
              cfg.sb_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.sb_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for strandbias filtering\n", optarg);
                   return -1;
              }
              break;
         case 'c':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.sb_filter.alpha = strtof(optarg, NULL);
              break;

         case 'Q':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.thresh = atoi(optarg);
              break;
         case 'q':
              cfg.snvqual_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.snvqual_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
                   return -1;
              }
              break;
         case 'r':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.alpha = strtof(optarg, NULL);
              break;
         case 's':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.ntests = atol(optarg);
              break;

         case 'K':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.thresh = atoi(optarg);
              break;
         case 'k':
              cfg.indelqual_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.indelqual_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
                   return -1;
              }
              break;
         case 'l':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.alpha = strtof(optarg, NULL);
              break;
         case 'm':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.ntests = atol(optarg);
              break;

         case '?':
              LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
              return 1;

         default:
              break;
         }
    }
    cfg.print_only_passed = print_only_passed;
    cfg.only_indels = only_indels;
    cfg.only_snvs = only_snvs;
    cfg.sb_filter.no_compound = sb_filter_no_compound;
    cfg.sb_filter.incl_indels = sb_filter_incl_indels;

    if (cfg.only_indels && cfg.only_snvs) {
         LOG_FATAL("%s\n", "Can't keep only indels and only snvs");
         return 1;
    }
    
    if (! no_defaults) {
         if (cfg.sb_filter.mtc_type==MTC_NONE && ! cfg.sb_filter.thresh) {
              LOG_VERBOSE("%s\n", "Setting default SB filtering method to FDR");
              cfg.sb_filter.mtc_type = MTC_FDR;
              cfg.sb_filter.alpha = 0.001;
         }
         if (cfg.dp_filter.min<0) {
              cfg.dp_filter.min = 10;
              LOG_VERBOSE("Setting default minimum coverage to %d\n", cfg.dp_filter.min);
         }
    } else {
         LOG_VERBOSE("%s\n", "Skipping default settings");
    }

    if (0 != argc - optind - 1) {/* FIXME needed at all? */
         LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
         return 1;
    }

    /* logic check of command line parameters
     */
    if (cfg.dp_filter.max > 0 &&  cfg.dp_filter.max < cfg.dp_filter.min) {
         LOG_FATAL("%s\n", "Invalid coverage-filter settings");
         return 1;
    }
    if ((cfg.af_filter.max > 0 && cfg.af_filter.max < cfg.af_filter.min) ||
        (cfg.af_filter.max > 1.0)) {
         LOG_FATAL("%s\n", "Invalid AF-filter settings");
         return 1;
    }

    if (cfg.sb_filter.thresh && cfg.sb_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed strand-bias threshold *and* multiple testing correction.");
         return 1;
    }
    if (cfg.snvqual_filter.thresh && cfg.snvqual_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed SNV quality threshold *and* multiple testing correction.");
         return 1;
    }
    if (cfg.indelqual_filter.thresh && cfg.indelqual_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed indel quality threshold *and* multiple testing correction.");
         return 1;
    }

    if (argc == 2) {
        fprintf(stderr, "\n");
        usage(& cfg);
        return 1;
    }

    if (debug) {
         dump_filter_conf(& cfg);
     }

    /* missing file args default to stdin and stdout
     */
    /* no streaming allowed for vcf_in: we need to determine thresholds first */
    if  (! vcf_in) {
         LOG_FATAL("%s\n", "Input VCF missing. No streaming allowed. Need to determine auto threshold in memory friendly manner first.");
         return 1;
    }
    if  (! vcf_out) {
         vcf_out = malloc(2 * sizeof(char));
         strcpy(vcf_out, "-");
    }
    LOG_DEBUG("vcf_in=%s vcf_out=%s\n", vcf_in, vcf_out);



    /* First pass parsing to get qualities for MTC computation (if needed)
     */
    if (cfg.sb_filter.mtc_type != MTC_NONE || cfg.snvqual_filter.mtc_type != MTC_NONE || cfg.indelqual_filter.mtc_type != MTC_NONE) {
#ifdef TRACE
         long int i = 0;
#endif
         LOG_VERBOSE("%s\n", "At least one type of multiple testing correction requested. Doing first pass of vcf");

         if ((num_vars = mtc_quals_from_vcf_file(& mtc_quals, vcf_in)) < 0) {
              LOG_ERROR("Couldn't parse %s\n", vcf_in);
              return 1;
         }

         if (cfg.sb_filter.mtc_type != MTC_NONE) {
              if (apply_sb_filter_mtc(mtc_quals, & cfg.sb_filter, num_vars)) {
                   LOG_FATAL("%s\n", "Multiple testing correction on strand-bias pvalues failed");
                   return -1;
              }
         }
         if (cfg.indelqual_filter.mtc_type != MTC_NONE) {
              if (apply_indelqual_filter_mtc(mtc_quals, & cfg.indelqual_filter, num_vars)) {
                   LOG_FATAL("%s\n", "Multiple testing correction on indel quality pvalues failed");
                   return -1;
              }
         }
         if (cfg.snvqual_filter.mtc_type != MTC_NONE) {
              if (apply_snvqual_filter_mtc(mtc_quals, & cfg.snvqual_filter, num_vars)) {
                   LOG_FATAL("%s\n", "Multiple testing correction on SNV quality pvalues failed");
                   return -1;
              }
         }
#ifdef TRACE
         for (i=0; i<num_vars; i++) {
              LOG_WARN("mtc_quals #%ld sb_qual=%d var_qual=%d is_indel=%d\n", 
                       i, mtc_quals[i].sb_qual, mtc_quals[i].var_qual, mtc_quals[i].is_indel);
         }
#endif
         LOG_VERBOSE("%s\n", "MTC application completed");
    } else {
         LOG_VERBOSE("%s\n", "No multiple testing correction requested. First pass of vcf skipped");

    }

    
    if (vcf_file_open(& cfg.vcf_in, vcf_in,
                      HAS_GZIP_EXT(vcf_in), 'r')) {
         LOG_ERROR("Couldn't open %s\n", vcf_in);
         return 1;
    }
    if (vcf_file_open(& cfg.vcf_out, vcf_out,
                      HAS_GZIP_EXT(vcf_out), 'w')) {
         LOG_ERROR("Couldn't open %s\n", vcf_out);
         return 1;
    }
    free(vcf_in);
    free(vcf_out);

    /* print header
     */
    if (0 !=  vcf_parse_header(&vcf_header, & cfg.vcf_in)) {
         /* LOG_WARN("%s\n", "vcf_parse_header() failed"); */
         if (vcf_file_seek(& cfg.vcf_in, 0, SEEK_SET)) {
              LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
                        " after header parsing failed");
              return -1;
         }
    }
    /* also sets filter names */
    cfg_filter_to_vcf_header(& cfg, &vcf_header);
    vcf_write_header(& cfg.vcf_out, vcf_header);
    free(vcf_header);


    /* read in variants
     */
    while (1) {
         var_t *var;
         int rc;
         int is_indel = 0;

         vcf_new_var(&var);
         rc = vcf_parse_var(& cfg.vcf_in, var);
         if (rc) {
              /* how to distinguish between error and EOF? */
              break;
         }
         var_idx += 1;

         is_indel = vcf_var_is_indel(var);

         if (cfg.only_snvs && is_indel) {
              vcf_free_var(&var);
              continue;
         } else if (cfg.only_indels && ! is_indel) {
              vcf_free_var(&var);
              continue;
         }


         /* filters applying to all types of variants
          */
         apply_af_filter(var, & cfg.af_filter);
         apply_dp_filter(var, & cfg.dp_filter);

         /* quality threshold per variant type
          */
         if (! is_indel) {
              if (cfg.snvqual_filter.thresh) {
                   assert(cfg.snvqual_filter.mtc_type == MTC_NONE);
                   apply_snvqual_threshold(var, & cfg.snvqual_filter);
              } else if (cfg.snvqual_filter.mtc_type != MTC_NONE) {
                   if (mtc_quals[var_idx].var_qual != -1) {
                        vcf_var_add_to_filter(var, cfg.snvqual_filter.id);
                   }
              }

         } else {
              if (cfg.indelqual_filter.thresh) {
                   assert(cfg.indelqual_filter.mtc_type == MTC_NONE);
                   apply_indelqual_threshold(var, & cfg.indelqual_filter);
              } else if (cfg.indelqual_filter.mtc_type != MTC_NONE) {
                   if (mtc_quals[var_idx].var_qual != -1) {
                        vcf_var_add_to_filter(var, cfg.indelqual_filter.id);
                   }
              }
         }
         
         /* sb filter 
          */
         if (cfg.sb_filter.thresh) {
              if (! is_indel || cfg.sb_filter.incl_indels) {
                   assert(cfg.sb_filter.mtc_type == MTC_NONE);
                   apply_sb_threshold(var, & cfg.sb_filter);
              }
         } else if (cfg.sb_filter.mtc_type != MTC_NONE) {
              if (! is_indel || cfg.sb_filter.incl_indels) {
                   if (mtc_quals[var_idx].sb_qual == -1) {
                        vcf_var_add_to_filter(var, cfg.sb_filter.id);
                   }
              }              
         }
         

         /* output
          */
         if (cfg.print_only_passed && ! (VCF_VAR_PASSES(var))) {
              vcf_free_var(&var);
              continue;
         }

         /* add pass if no filters were set */
         if (! var->filter || strlen(var->filter)<=1) {
              char pass_str[] = "PASS";
              if (var->filter) {
                   free(var->filter);
              }
              var->filter = strdup(pass_str);
         }

         vcf_write_var(& cfg.vcf_out, var);
         vcf_free_var(&var);

         if (var_idx%1000==0) {
              (void) vcf_file_flush(& cfg.vcf_out);
         }
    }

    vcf_file_close(& cfg.vcf_in);
    vcf_file_close(& cfg.vcf_out);

    free(mtc_quals);

    LOG_VERBOSE("%s\n", "Successful exit.");

    return 0;
}
Beispiel #2
0
int
main_filter(int argc, char *argv[])
{
     filter_conf_t cfg;
     char *vcf_in = NULL, *vcf_out = NULL;
     static int print_only_passed = 1;
     static int sb_filter_no_compound = 0;
     static int sb_filter_incl_indels = 0;
     static int only_indels = 0;
     static int only_snvs = 0;
     char *vcf_header = NULL;
     var_t **vars = NULL;
     long int num_vars = 0; /* isn't long overkill here ? */
     long int vars_size = 0; /* keeping track of how much memory we've got pre-allocated */
     long int i;
     static int no_defaults = 0;

     /* default filter options */
     memset(&cfg, 0, sizeof(filter_conf_t));
     cfg.dp_filter.min = cfg.dp_filter.max = -1;
     cfg.af_filter.min = cfg.af_filter.max = -1;
     cfg.sb_filter.alpha = DEFAULT_SIG;
     cfg.snvqual_filter.alpha = DEFAULT_SIG;
     cfg.indelqual_filter.alpha = DEFAULT_SIG;


    /* keep in sync with long_opts_str and usage
     *
     * getopt is a pain in the whole when it comes to syncing of long
     * and short args and usage. check out gopt, libcfu...
     */
    while (1) {
         int c;
         static struct option long_opts[] = {
              /* see usage sync */
              {"verbose", no_argument, &verbose, 1},
              {"debug", no_argument, &debug, 1},
              {"print-all", no_argument, &print_only_passed, 0},
              {"no-defaults", no_argument, &no_defaults, 1},
              {"only-indels", no_argument, &only_indels, 1},
              {"only-snvs", no_argument, &only_snvs, 1},

              {"help", no_argument, NULL, 'h'},
              {"in", required_argument, NULL, 'i'},
              {"out", required_argument, NULL, 'o'},

              {"cov-min", required_argument, NULL, 'v'},
              {"cov-max", required_argument, NULL, 'V'},

              {"af-min", required_argument, NULL, 'a'},
              {"af-max", required_argument, NULL, 'A'},

              {"sb-thresh", required_argument, NULL, 'B'},
              {"sb-mtc", required_argument, NULL, 'b'},
              {"sb-alpha", required_argument, NULL, 'c'},
              {"sb-no-compound", no_argument, &sb_filter_no_compound, 1},
              {"sb-incl-indels", no_argument, &sb_filter_incl_indels, 1},

              {"snvqual-thresh", required_argument, NULL, 'Q'},
              {"snvqual-mtc", required_argument, NULL, 'q'},
              {"snvqual-alpha", required_argument, NULL, 'r'},
              {"snvqual-ntests", required_argument, NULL, 's'},

              {"indelqual-thresh", required_argument, NULL, 'K'},
              {"indelqual-mtc", required_argument, NULL, 'k'},
              {"indelqual-alpha", required_argument, NULL, 'l'},
              {"indelqual-ntests", required_argument, NULL, 'm'},

              {0, 0, 0, 0} /* sentinel */
         };

         /* keep in sync with long_opts and usage */
         static const char *long_opts_str = "hi:o:v:V:a:A:B:b:c:Q:q:r:s:K:k:l:m:";

         /* getopt_long stores the option index here. */
         int long_opts_index = 0;
         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
                         long_opts_str, long_opts, & long_opts_index);
         if (c == -1) {
              break;
         }

         switch (c) {
         /* keep in sync with long_opts etc */
         case 'h':
              usage(& cfg);
              return 0;

         case 'i':
              vcf_in = strdup(optarg);
              break;
         case 'o':
              if (0 != strcmp(optarg, "-")) {
                   if (file_exists(optarg)) {
                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
                        return 1;
                   }
              }
              vcf_out = strdup(optarg);
              break;

         case 'v':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.dp_filter.min = atoi(optarg);
              break;
         case 'V':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.dp_filter.max = atoi(optarg);
              break;

         case 'a':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.af_filter.min = strtof(optarg, NULL);
              break;
         case 'A':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.af_filter.max = strtof(optarg, NULL);
              break;

         case 'B':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.sb_filter.thresh = atoi(optarg);
              break;
         case 'b':
              cfg.sb_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.sb_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for strandbias filtering\n", optarg);
                   return -1;
              }
              break;
         case 'c':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.sb_filter.alpha = strtof(optarg, NULL);
              break;

         case 'Q':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.thresh = atoi(optarg);
              break;
         case 'q':
              cfg.snvqual_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.snvqual_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
                   return -1;
              }
              break;
         case 'r':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.alpha = strtof(optarg, NULL);
              break;
         case 's':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.ntests = atol(optarg);
              break;

         case 'K':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.thresh = atoi(optarg);
              break;
         case 'k':
              cfg.indelqual_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.indelqual_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
                   return -1;
              }
              break;
         case 'l':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.alpha = strtof(optarg, NULL);
              break;
         case 'm':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.ntests = atol(optarg);
              break;

         case '?':
              LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
              return 1;

         default:
              break;
         }
    }
    cfg.print_only_passed = print_only_passed;
    cfg.only_indels = only_indels;
    cfg.only_snvs = only_snvs;
    cfg.sb_filter.no_compound = sb_filter_no_compound;
    cfg.sb_filter.incl_indels = sb_filter_incl_indels;

    if (cfg.only_indels && cfg.only_snvs) {
         LOG_FATAL("%s\n", "Can't keep only indels and only snvs");
         return 1;
    }
    
    if (! no_defaults) {
         if (cfg.sb_filter.mtc_type==MTC_NONE && ! cfg.sb_filter.thresh) {
              LOG_VERBOSE("%s\n", "Setting default SB filtering method to FDR");
              cfg.sb_filter.mtc_type = MTC_FDR;
              cfg.sb_filter.alpha = 0.001;
         }
         if (cfg.dp_filter.min<0) {
              cfg.dp_filter.min = 10;
              LOG_VERBOSE("Setting default minimum coverage to %d\n", cfg.dp_filter.min);
         }
    } else {
         LOG_VERBOSE("%s\n", "Skipping default settings");
    }

    if (0 != argc - optind - 1) {/* FIXME needed at all? */
         LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
         return 1;
    }

    /* logic check of command line parameters
     */
    if (cfg.dp_filter.max > 0 &&  cfg.dp_filter.max < cfg.dp_filter.min) {
         LOG_FATAL("%s\n", "Invalid coverage-filter settings");
         return 1;
    }
    if ((cfg.af_filter.max > 0 && cfg.af_filter.max < cfg.af_filter.min) ||
        (cfg.af_filter.max > 1.0)) {
         LOG_FATAL("%s\n", "Invalid AF-filter settings");
         return 1;
    }

    if (cfg.sb_filter.thresh && cfg.sb_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed strand-bias threshold *and* multiple testing correction.");
         return 1;
    }
    if (cfg.snvqual_filter.thresh && cfg.snvqual_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed SNV quality threshold *and* multiple testing correction.");
         return 1;
    }
    if (cfg.indelqual_filter.thresh && cfg.indelqual_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed indel quality threshold *and* multiple testing correction.");
         return 1;
    }

    if (argc == 2) {
        fprintf(stderr, "\n");
        usage(& cfg);
        return 1;
    }

    if (debug) {
          dump_filter_conf(& cfg);
     }

    /* missing file args default to stdin and stdout
     */
    if  (! vcf_in) {
         vcf_in = malloc(2 * sizeof(char));
         strcpy(vcf_in, "-");
    }
    if  (! vcf_out) {
         vcf_out = malloc(2 * sizeof(char));
         strcpy(vcf_out, "-");
    }
    LOG_DEBUG("vcf_in=%s vcf_out=%s\n", vcf_in, vcf_out);


    /* open vcf files
     */
    if (vcf_file_open(& cfg.vcf_in, vcf_in,
                      HAS_GZIP_EXT(vcf_in), 'r')) {
         LOG_ERROR("Couldn't open %s\n", vcf_in);
         return 1;
    }
    if (vcf_file_open(& cfg.vcf_out, vcf_out,
                      HAS_GZIP_EXT(vcf_out), 'w')) {
         LOG_ERROR("Couldn't open %s\n", vcf_out);
         return 1;
    }
    free(vcf_in);
    free(vcf_out);

    /* FIXME everything below here should go into a function with args:
       - cfg
       - ...what else?
    */

    /* print header
     */
    if (0 !=  vcf_parse_header(&vcf_header, & cfg.vcf_in)) {
         /* LOG_WARN("%s\n", "vcf_parse_header() failed"); */
         if (vcf_file_seek(& cfg.vcf_in, 0, SEEK_SET)) {
              LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
                        " after header parsing failed");
              return -1;
         }
    }
    /* also sets filter names */
    cfg_filter_to_vcf_header(& cfg, &vcf_header);
    vcf_write_header(& cfg.vcf_out, vcf_header);
    free(vcf_header);


    /* read in variants. since many filters perform multiple testing
     * correction and therefore need to look at all variants we keep
     * it simple and load them all into memory. 
     * 
     * in theory we could apply all 'simple' filters directly within
     * the loop here and depending on the result spit the variant out
     * or not. only complex filters need to see all variants first to,
     * e.g. apply multiple testing.
     */
    num_vars = 0;
    while (1) {
         var_t *var;
         int rc;
         int is_indel = 0;

         vcf_new_var(&var);
         rc = vcf_parse_var(& cfg.vcf_in, var);
         if (rc) {
              /* how to distinguish between error and EOF? */
              free(var);
              break;
         }

         is_indel = vcf_var_is_indel(var);

         if (cfg.only_snvs && is_indel) {
              free(var);
              continue;
         } else if (cfg.only_indels && ! is_indel) {
              free(var);
              continue;
         }

         /* read all in, no matter if already filtered. we keep adding filters */
         num_vars +=1;
         if (num_vars >= vars_size) {
              const long incr = 128;
              vars = realloc(vars, (vars_size+incr) * sizeof(var_t*));
              vars_size += incr;
         }
         vars[num_vars-1] = var;
#ifdef TRACE
         {
              char *key;
              vcf_var_key(&key,  vars[num_vars-1]);
              fprintf(stderr, "storing var %ld+1: %s\n", num_vars, key);
              free(key);
         }
#endif

         /* filters applying to all types of variants
          */
         apply_af_filter(var, & cfg.af_filter);
         apply_dp_filter(var, & cfg.dp_filter);

         /* quality threshold per variant type
          */
         if (! is_indel) {
              if (cfg.snvqual_filter.thresh) {
                   assert(cfg.snvqual_filter.mtc_type == MTC_NONE);
                   apply_snvqual_threshold(var, & cfg.snvqual_filter);
              }

         } else {
              if (cfg.indelqual_filter.thresh) {
                   assert(cfg.indelqual_filter.mtc_type == MTC_NONE);
                   apply_indelqual_threshold(var, & cfg.indelqual_filter);
              }
         }
         
         if (cfg.sb_filter.thresh) {
              if (! is_indel || cfg.sb_filter.incl_indels) {
                   assert(cfg.sb_filter.mtc_type == MTC_NONE);
                   apply_sb_threshold(var, & cfg.sb_filter);
              }
         }
    }

    if (num_vars) {
         vars = realloc(vars, (num_vars * sizeof(var_t*)));
    }
    vcf_file_close(& cfg.vcf_in);
    LOG_VERBOSE("Parsed %ld variants\n", num_vars);


    if (cfg.sb_filter.mtc_type != MTC_NONE) {
         if (apply_sb_filter_mtc(& cfg.sb_filter, vars, num_vars)) {
              LOG_FATAL("%s\n", "Multiple testing correction on strand-bias pvalues failed");
              return -1;
         }
    }

    if (cfg.snvqual_filter.mtc_type != MTC_NONE) {
         if (apply_snvqual_filter_mtc(& cfg.snvqual_filter, vars, num_vars)) {
              LOG_FATAL("%s\n", "Multiple testing correction on SNV qualities failed");
              return -1;
         }
    }

    if (cfg.indelqual_filter.mtc_type != MTC_NONE) {
         if (apply_indelqual_filter_mtc(& cfg.indelqual_filter, vars, num_vars)) {
              LOG_FATAL("%s\n", "Multiple testing correction on Indel qualities failed");
              return -1;
         }
    }

    /* output
     */
    for (i=0; i<num_vars; i++) {
         var_t *v = vars[i];

         if (cfg.print_only_passed && ! (VCF_VAR_PASSES(v))) {
              continue;
         }

         /* add pass if no filters were set */
         if (! v->filter || strlen(v->filter)<=1) {
              char pass_str[] = "PASS";
              if (v->filter) {
                   free(v->filter);
              }
              v->filter = strdup(pass_str);
         }

         vcf_write_var(& cfg.vcf_out, v);
    }
    vcf_file_close(& cfg.vcf_out);


    for (i=0; i<num_vars; i++) {
         vcf_free_var(& vars[i]);
    }
    free(vars);

    LOG_VERBOSE("%s\n", "Successful exit.");

    return 0;
}