int main_filter(int argc, char *argv[]) { filter_conf_t cfg; char *vcf_in = NULL, *vcf_out = NULL; static int print_only_passed = 1; static int sb_filter_no_compound = 0; static int sb_filter_incl_indels = 0; static int only_indels = 0; static int only_snvs = 0; char *vcf_header = NULL; mtc_qual_t *mtc_quals = NULL; long int num_vars; static int no_defaults = 0; long int var_idx = -1; /* default filter options */ memset(&cfg, 0, sizeof(filter_conf_t)); cfg.dp_filter.min = cfg.dp_filter.max = -1; cfg.af_filter.min = cfg.af_filter.max = -1; cfg.sb_filter.alpha = DEFAULT_SIG; cfg.snvqual_filter.alpha = DEFAULT_SIG; cfg.indelqual_filter.alpha = DEFAULT_SIG; /* keep in sync with long_opts_str and usage * * getopt is a pain in the whole when it comes to syncing of long * and short args and usage. check out gopt, libcfu... */ while (1) { int c; static struct option long_opts[] = { /* see usage sync */ {"verbose", no_argument, &verbose, 1}, {"debug", no_argument, &debug, 1}, {"print-all", no_argument, &print_only_passed, 0}, {"no-defaults", no_argument, &no_defaults, 1}, {"only-indels", no_argument, &only_indels, 1}, {"only-snvs", no_argument, &only_snvs, 1}, {"help", no_argument, NULL, 'h'}, {"in", required_argument, NULL, 'i'}, {"out", required_argument, NULL, 'o'}, {"cov-min", required_argument, NULL, 'v'}, {"cov-max", required_argument, NULL, 'V'}, {"af-min", required_argument, NULL, 'a'}, {"af-max", required_argument, NULL, 'A'}, {"sb-thresh", required_argument, NULL, 'B'}, {"sb-mtc", required_argument, NULL, 'b'}, {"sb-alpha", required_argument, NULL, 'c'}, {"sb-no-compound", no_argument, &sb_filter_no_compound, 1}, {"sb-incl-indels", no_argument, &sb_filter_incl_indels, 1}, {"snvqual-thresh", required_argument, NULL, 'Q'}, {"snvqual-mtc", required_argument, NULL, 'q'}, {"snvqual-alpha", required_argument, NULL, 'r'}, {"snvqual-ntests", required_argument, NULL, 's'}, {"indelqual-thresh", required_argument, NULL, 'K'}, {"indelqual-mtc", required_argument, NULL, 'k'}, {"indelqual-alpha", required_argument, NULL, 'l'}, {"indelqual-ntests", required_argument, NULL, 'm'}, {0, 0, 0, 0} /* sentinel */ }; /* keep in sync with long_opts and usage */ static const char *long_opts_str = "hi:o:v:V:a:A:B:b:c:Q:q:r:s:K:k:l:m:"; /* getopt_long stores the option index here. */ int long_opts_index = 0; c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */ long_opts_str, long_opts, & long_opts_index); if (c == -1) { break; } switch (c) { /* keep in sync with long_opts etc */ case 'h': usage(& cfg); return 0; case 'i': vcf_in = strdup(optarg); break; case 'o': if (0 != strcmp(optarg, "-")) { if (file_exists(optarg)) { LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg); return 1; } } vcf_out = strdup(optarg); break; case 'v': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.dp_filter.min = atoi(optarg); break; case 'V': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.dp_filter.max = atoi(optarg); break; case 'a': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.af_filter.min = strtof(optarg, NULL); break; case 'A': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.af_filter.max = strtof(optarg, NULL); break; case 'B': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.sb_filter.thresh = atoi(optarg); break; case 'b': cfg.sb_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.sb_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for strandbias filtering\n", optarg); return -1; } break; case 'c': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.sb_filter.alpha = strtof(optarg, NULL); break; case 'Q': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.thresh = atoi(optarg); break; case 'q': cfg.snvqual_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.snvqual_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg); return -1; } break; case 'r': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.alpha = strtof(optarg, NULL); break; case 's': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.ntests = atol(optarg); break; case 'K': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.thresh = atoi(optarg); break; case 'k': cfg.indelqual_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.indelqual_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg); return -1; } break; case 'l': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.alpha = strtof(optarg, NULL); break; case 'm': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.ntests = atol(optarg); break; case '?': LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n"); return 1; default: break; } } cfg.print_only_passed = print_only_passed; cfg.only_indels = only_indels; cfg.only_snvs = only_snvs; cfg.sb_filter.no_compound = sb_filter_no_compound; cfg.sb_filter.incl_indels = sb_filter_incl_indels; if (cfg.only_indels && cfg.only_snvs) { LOG_FATAL("%s\n", "Can't keep only indels and only snvs"); return 1; } if (! no_defaults) { if (cfg.sb_filter.mtc_type==MTC_NONE && ! cfg.sb_filter.thresh) { LOG_VERBOSE("%s\n", "Setting default SB filtering method to FDR"); cfg.sb_filter.mtc_type = MTC_FDR; cfg.sb_filter.alpha = 0.001; } if (cfg.dp_filter.min<0) { cfg.dp_filter.min = 10; LOG_VERBOSE("Setting default minimum coverage to %d\n", cfg.dp_filter.min); } } else { LOG_VERBOSE("%s\n", "Skipping default settings"); } if (0 != argc - optind - 1) {/* FIXME needed at all? */ LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n"); return 1; } /* logic check of command line parameters */ if (cfg.dp_filter.max > 0 && cfg.dp_filter.max < cfg.dp_filter.min) { LOG_FATAL("%s\n", "Invalid coverage-filter settings"); return 1; } if ((cfg.af_filter.max > 0 && cfg.af_filter.max < cfg.af_filter.min) || (cfg.af_filter.max > 1.0)) { LOG_FATAL("%s\n", "Invalid AF-filter settings"); return 1; } if (cfg.sb_filter.thresh && cfg.sb_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed strand-bias threshold *and* multiple testing correction."); return 1; } if (cfg.snvqual_filter.thresh && cfg.snvqual_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed SNV quality threshold *and* multiple testing correction."); return 1; } if (cfg.indelqual_filter.thresh && cfg.indelqual_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed indel quality threshold *and* multiple testing correction."); return 1; } if (argc == 2) { fprintf(stderr, "\n"); usage(& cfg); return 1; } if (debug) { dump_filter_conf(& cfg); } /* missing file args default to stdin and stdout */ /* no streaming allowed for vcf_in: we need to determine thresholds first */ if (! vcf_in) { LOG_FATAL("%s\n", "Input VCF missing. No streaming allowed. Need to determine auto threshold in memory friendly manner first."); return 1; } if (! vcf_out) { vcf_out = malloc(2 * sizeof(char)); strcpy(vcf_out, "-"); } LOG_DEBUG("vcf_in=%s vcf_out=%s\n", vcf_in, vcf_out); /* First pass parsing to get qualities for MTC computation (if needed) */ if (cfg.sb_filter.mtc_type != MTC_NONE || cfg.snvqual_filter.mtc_type != MTC_NONE || cfg.indelqual_filter.mtc_type != MTC_NONE) { #ifdef TRACE long int i = 0; #endif LOG_VERBOSE("%s\n", "At least one type of multiple testing correction requested. Doing first pass of vcf"); if ((num_vars = mtc_quals_from_vcf_file(& mtc_quals, vcf_in)) < 0) { LOG_ERROR("Couldn't parse %s\n", vcf_in); return 1; } if (cfg.sb_filter.mtc_type != MTC_NONE) { if (apply_sb_filter_mtc(mtc_quals, & cfg.sb_filter, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on strand-bias pvalues failed"); return -1; } } if (cfg.indelqual_filter.mtc_type != MTC_NONE) { if (apply_indelqual_filter_mtc(mtc_quals, & cfg.indelqual_filter, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on indel quality pvalues failed"); return -1; } } if (cfg.snvqual_filter.mtc_type != MTC_NONE) { if (apply_snvqual_filter_mtc(mtc_quals, & cfg.snvqual_filter, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on SNV quality pvalues failed"); return -1; } } #ifdef TRACE for (i=0; i<num_vars; i++) { LOG_WARN("mtc_quals #%ld sb_qual=%d var_qual=%d is_indel=%d\n", i, mtc_quals[i].sb_qual, mtc_quals[i].var_qual, mtc_quals[i].is_indel); } #endif LOG_VERBOSE("%s\n", "MTC application completed"); } else { LOG_VERBOSE("%s\n", "No multiple testing correction requested. First pass of vcf skipped"); } if (vcf_file_open(& cfg.vcf_in, vcf_in, HAS_GZIP_EXT(vcf_in), 'r')) { LOG_ERROR("Couldn't open %s\n", vcf_in); return 1; } if (vcf_file_open(& cfg.vcf_out, vcf_out, HAS_GZIP_EXT(vcf_out), 'w')) { LOG_ERROR("Couldn't open %s\n", vcf_out); return 1; } free(vcf_in); free(vcf_out); /* print header */ if (0 != vcf_parse_header(&vcf_header, & cfg.vcf_in)) { /* LOG_WARN("%s\n", "vcf_parse_header() failed"); */ if (vcf_file_seek(& cfg.vcf_in, 0, SEEK_SET)) { LOG_FATAL("%s\n", "Couldn't rewind file to parse variants" " after header parsing failed"); return -1; } } /* also sets filter names */ cfg_filter_to_vcf_header(& cfg, &vcf_header); vcf_write_header(& cfg.vcf_out, vcf_header); free(vcf_header); /* read in variants */ while (1) { var_t *var; int rc; int is_indel = 0; vcf_new_var(&var); rc = vcf_parse_var(& cfg.vcf_in, var); if (rc) { /* how to distinguish between error and EOF? */ break; } var_idx += 1; is_indel = vcf_var_is_indel(var); if (cfg.only_snvs && is_indel) { vcf_free_var(&var); continue; } else if (cfg.only_indels && ! is_indel) { vcf_free_var(&var); continue; } /* filters applying to all types of variants */ apply_af_filter(var, & cfg.af_filter); apply_dp_filter(var, & cfg.dp_filter); /* quality threshold per variant type */ if (! is_indel) { if (cfg.snvqual_filter.thresh) { assert(cfg.snvqual_filter.mtc_type == MTC_NONE); apply_snvqual_threshold(var, & cfg.snvqual_filter); } else if (cfg.snvqual_filter.mtc_type != MTC_NONE) { if (mtc_quals[var_idx].var_qual != -1) { vcf_var_add_to_filter(var, cfg.snvqual_filter.id); } } } else { if (cfg.indelqual_filter.thresh) { assert(cfg.indelqual_filter.mtc_type == MTC_NONE); apply_indelqual_threshold(var, & cfg.indelqual_filter); } else if (cfg.indelqual_filter.mtc_type != MTC_NONE) { if (mtc_quals[var_idx].var_qual != -1) { vcf_var_add_to_filter(var, cfg.indelqual_filter.id); } } } /* sb filter */ if (cfg.sb_filter.thresh) { if (! is_indel || cfg.sb_filter.incl_indels) { assert(cfg.sb_filter.mtc_type == MTC_NONE); apply_sb_threshold(var, & cfg.sb_filter); } } else if (cfg.sb_filter.mtc_type != MTC_NONE) { if (! is_indel || cfg.sb_filter.incl_indels) { if (mtc_quals[var_idx].sb_qual == -1) { vcf_var_add_to_filter(var, cfg.sb_filter.id); } } } /* output */ if (cfg.print_only_passed && ! (VCF_VAR_PASSES(var))) { vcf_free_var(&var); continue; } /* add pass if no filters were set */ if (! var->filter || strlen(var->filter)<=1) { char pass_str[] = "PASS"; if (var->filter) { free(var->filter); } var->filter = strdup(pass_str); } vcf_write_var(& cfg.vcf_out, var); vcf_free_var(&var); if (var_idx%1000==0) { (void) vcf_file_flush(& cfg.vcf_out); } } vcf_file_close(& cfg.vcf_in); vcf_file_close(& cfg.vcf_out); free(mtc_quals); LOG_VERBOSE("%s\n", "Successful exit."); return 0; }
int main_filter(int argc, char *argv[]) { filter_conf_t cfg; char *vcf_in = NULL, *vcf_out = NULL; static int print_only_passed = 1; static int sb_filter_no_compound = 0; static int sb_filter_incl_indels = 0; static int only_indels = 0; static int only_snvs = 0; char *vcf_header = NULL; var_t **vars = NULL; long int num_vars = 0; /* isn't long overkill here ? */ long int vars_size = 0; /* keeping track of how much memory we've got pre-allocated */ long int i; static int no_defaults = 0; /* default filter options */ memset(&cfg, 0, sizeof(filter_conf_t)); cfg.dp_filter.min = cfg.dp_filter.max = -1; cfg.af_filter.min = cfg.af_filter.max = -1; cfg.sb_filter.alpha = DEFAULT_SIG; cfg.snvqual_filter.alpha = DEFAULT_SIG; cfg.indelqual_filter.alpha = DEFAULT_SIG; /* keep in sync with long_opts_str and usage * * getopt is a pain in the whole when it comes to syncing of long * and short args and usage. check out gopt, libcfu... */ while (1) { int c; static struct option long_opts[] = { /* see usage sync */ {"verbose", no_argument, &verbose, 1}, {"debug", no_argument, &debug, 1}, {"print-all", no_argument, &print_only_passed, 0}, {"no-defaults", no_argument, &no_defaults, 1}, {"only-indels", no_argument, &only_indels, 1}, {"only-snvs", no_argument, &only_snvs, 1}, {"help", no_argument, NULL, 'h'}, {"in", required_argument, NULL, 'i'}, {"out", required_argument, NULL, 'o'}, {"cov-min", required_argument, NULL, 'v'}, {"cov-max", required_argument, NULL, 'V'}, {"af-min", required_argument, NULL, 'a'}, {"af-max", required_argument, NULL, 'A'}, {"sb-thresh", required_argument, NULL, 'B'}, {"sb-mtc", required_argument, NULL, 'b'}, {"sb-alpha", required_argument, NULL, 'c'}, {"sb-no-compound", no_argument, &sb_filter_no_compound, 1}, {"sb-incl-indels", no_argument, &sb_filter_incl_indels, 1}, {"snvqual-thresh", required_argument, NULL, 'Q'}, {"snvqual-mtc", required_argument, NULL, 'q'}, {"snvqual-alpha", required_argument, NULL, 'r'}, {"snvqual-ntests", required_argument, NULL, 's'}, {"indelqual-thresh", required_argument, NULL, 'K'}, {"indelqual-mtc", required_argument, NULL, 'k'}, {"indelqual-alpha", required_argument, NULL, 'l'}, {"indelqual-ntests", required_argument, NULL, 'm'}, {0, 0, 0, 0} /* sentinel */ }; /* keep in sync with long_opts and usage */ static const char *long_opts_str = "hi:o:v:V:a:A:B:b:c:Q:q:r:s:K:k:l:m:"; /* getopt_long stores the option index here. */ int long_opts_index = 0; c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */ long_opts_str, long_opts, & long_opts_index); if (c == -1) { break; } switch (c) { /* keep in sync with long_opts etc */ case 'h': usage(& cfg); return 0; case 'i': vcf_in = strdup(optarg); break; case 'o': if (0 != strcmp(optarg, "-")) { if (file_exists(optarg)) { LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg); return 1; } } vcf_out = strdup(optarg); break; case 'v': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.dp_filter.min = atoi(optarg); break; case 'V': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.dp_filter.max = atoi(optarg); break; case 'a': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.af_filter.min = strtof(optarg, NULL); break; case 'A': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.af_filter.max = strtof(optarg, NULL); break; case 'B': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.sb_filter.thresh = atoi(optarg); break; case 'b': cfg.sb_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.sb_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for strandbias filtering\n", optarg); return -1; } break; case 'c': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.sb_filter.alpha = strtof(optarg, NULL); break; case 'Q': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.thresh = atoi(optarg); break; case 'q': cfg.snvqual_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.snvqual_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg); return -1; } break; case 'r': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.alpha = strtof(optarg, NULL); break; case 's': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.ntests = atol(optarg); break; case 'K': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.thresh = atoi(optarg); break; case 'k': cfg.indelqual_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.indelqual_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg); return -1; } break; case 'l': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.alpha = strtof(optarg, NULL); break; case 'm': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.ntests = atol(optarg); break; case '?': LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n"); return 1; default: break; } } cfg.print_only_passed = print_only_passed; cfg.only_indels = only_indels; cfg.only_snvs = only_snvs; cfg.sb_filter.no_compound = sb_filter_no_compound; cfg.sb_filter.incl_indels = sb_filter_incl_indels; if (cfg.only_indels && cfg.only_snvs) { LOG_FATAL("%s\n", "Can't keep only indels and only snvs"); return 1; } if (! no_defaults) { if (cfg.sb_filter.mtc_type==MTC_NONE && ! cfg.sb_filter.thresh) { LOG_VERBOSE("%s\n", "Setting default SB filtering method to FDR"); cfg.sb_filter.mtc_type = MTC_FDR; cfg.sb_filter.alpha = 0.001; } if (cfg.dp_filter.min<0) { cfg.dp_filter.min = 10; LOG_VERBOSE("Setting default minimum coverage to %d\n", cfg.dp_filter.min); } } else { LOG_VERBOSE("%s\n", "Skipping default settings"); } if (0 != argc - optind - 1) {/* FIXME needed at all? */ LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n"); return 1; } /* logic check of command line parameters */ if (cfg.dp_filter.max > 0 && cfg.dp_filter.max < cfg.dp_filter.min) { LOG_FATAL("%s\n", "Invalid coverage-filter settings"); return 1; } if ((cfg.af_filter.max > 0 && cfg.af_filter.max < cfg.af_filter.min) || (cfg.af_filter.max > 1.0)) { LOG_FATAL("%s\n", "Invalid AF-filter settings"); return 1; } if (cfg.sb_filter.thresh && cfg.sb_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed strand-bias threshold *and* multiple testing correction."); return 1; } if (cfg.snvqual_filter.thresh && cfg.snvqual_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed SNV quality threshold *and* multiple testing correction."); return 1; } if (cfg.indelqual_filter.thresh && cfg.indelqual_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed indel quality threshold *and* multiple testing correction."); return 1; } if (argc == 2) { fprintf(stderr, "\n"); usage(& cfg); return 1; } if (debug) { dump_filter_conf(& cfg); } /* missing file args default to stdin and stdout */ if (! vcf_in) { vcf_in = malloc(2 * sizeof(char)); strcpy(vcf_in, "-"); } if (! vcf_out) { vcf_out = malloc(2 * sizeof(char)); strcpy(vcf_out, "-"); } LOG_DEBUG("vcf_in=%s vcf_out=%s\n", vcf_in, vcf_out); /* open vcf files */ if (vcf_file_open(& cfg.vcf_in, vcf_in, HAS_GZIP_EXT(vcf_in), 'r')) { LOG_ERROR("Couldn't open %s\n", vcf_in); return 1; } if (vcf_file_open(& cfg.vcf_out, vcf_out, HAS_GZIP_EXT(vcf_out), 'w')) { LOG_ERROR("Couldn't open %s\n", vcf_out); return 1; } free(vcf_in); free(vcf_out); /* FIXME everything below here should go into a function with args: - cfg - ...what else? */ /* print header */ if (0 != vcf_parse_header(&vcf_header, & cfg.vcf_in)) { /* LOG_WARN("%s\n", "vcf_parse_header() failed"); */ if (vcf_file_seek(& cfg.vcf_in, 0, SEEK_SET)) { LOG_FATAL("%s\n", "Couldn't rewind file to parse variants" " after header parsing failed"); return -1; } } /* also sets filter names */ cfg_filter_to_vcf_header(& cfg, &vcf_header); vcf_write_header(& cfg.vcf_out, vcf_header); free(vcf_header); /* read in variants. since many filters perform multiple testing * correction and therefore need to look at all variants we keep * it simple and load them all into memory. * * in theory we could apply all 'simple' filters directly within * the loop here and depending on the result spit the variant out * or not. only complex filters need to see all variants first to, * e.g. apply multiple testing. */ num_vars = 0; while (1) { var_t *var; int rc; int is_indel = 0; vcf_new_var(&var); rc = vcf_parse_var(& cfg.vcf_in, var); if (rc) { /* how to distinguish between error and EOF? */ free(var); break; } is_indel = vcf_var_is_indel(var); if (cfg.only_snvs && is_indel) { free(var); continue; } else if (cfg.only_indels && ! is_indel) { free(var); continue; } /* read all in, no matter if already filtered. we keep adding filters */ num_vars +=1; if (num_vars >= vars_size) { const long incr = 128; vars = realloc(vars, (vars_size+incr) * sizeof(var_t*)); vars_size += incr; } vars[num_vars-1] = var; #ifdef TRACE { char *key; vcf_var_key(&key, vars[num_vars-1]); fprintf(stderr, "storing var %ld+1: %s\n", num_vars, key); free(key); } #endif /* filters applying to all types of variants */ apply_af_filter(var, & cfg.af_filter); apply_dp_filter(var, & cfg.dp_filter); /* quality threshold per variant type */ if (! is_indel) { if (cfg.snvqual_filter.thresh) { assert(cfg.snvqual_filter.mtc_type == MTC_NONE); apply_snvqual_threshold(var, & cfg.snvqual_filter); } } else { if (cfg.indelqual_filter.thresh) { assert(cfg.indelqual_filter.mtc_type == MTC_NONE); apply_indelqual_threshold(var, & cfg.indelqual_filter); } } if (cfg.sb_filter.thresh) { if (! is_indel || cfg.sb_filter.incl_indels) { assert(cfg.sb_filter.mtc_type == MTC_NONE); apply_sb_threshold(var, & cfg.sb_filter); } } } if (num_vars) { vars = realloc(vars, (num_vars * sizeof(var_t*))); } vcf_file_close(& cfg.vcf_in); LOG_VERBOSE("Parsed %ld variants\n", num_vars); if (cfg.sb_filter.mtc_type != MTC_NONE) { if (apply_sb_filter_mtc(& cfg.sb_filter, vars, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on strand-bias pvalues failed"); return -1; } } if (cfg.snvqual_filter.mtc_type != MTC_NONE) { if (apply_snvqual_filter_mtc(& cfg.snvqual_filter, vars, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on SNV qualities failed"); return -1; } } if (cfg.indelqual_filter.mtc_type != MTC_NONE) { if (apply_indelqual_filter_mtc(& cfg.indelqual_filter, vars, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on Indel qualities failed"); return -1; } } /* output */ for (i=0; i<num_vars; i++) { var_t *v = vars[i]; if (cfg.print_only_passed && ! (VCF_VAR_PASSES(v))) { continue; } /* add pass if no filters were set */ if (! v->filter || strlen(v->filter)<=1) { char pass_str[] = "PASS"; if (v->filter) { free(v->filter); } v->filter = strdup(pass_str); } vcf_write_var(& cfg.vcf_out, v); } vcf_file_close(& cfg.vcf_out); for (i=0; i<num_vars; i++) { vcf_free_var(& vars[i]); } free(vars); LOG_VERBOSE("%s\n", "Successful exit."); return 0; }