void apply_af_filter(var_t *var, af_filter_t *af_filter) { char *af_char = NULL; float af; if (af_missing_warning_printed) { return; } if (af_filter->min > 0 || af_filter->max > 0) { if ( ! vcf_var_has_info_key(&af_char, var, "AF")) { if ( ! af_missing_warning_printed) { LOG_WARN("%s\n", "Requested AF filtering failed since AF tag is missing in variant"); af_missing_warning_printed = 1; return; } } af = strtof(af_char, (char **)NULL); /* atof */ if (errno==ERANGE) { LOG_ERROR("Couldn't parse EF from af_char %s. Disabling AF filtering", af_char); af_missing_warning_printed = 1; return; } free(af_char); if (af_filter->min > 0.0 && af < af_filter->min) { vcf_var_add_to_filter(var, af_filter->id_min); } if (af_filter->max > 0.0 && af > af_filter->max) { vcf_var_add_to_filter(var, af_filter->id_max); } } }
void apply_sb_threshold(var_t *var, sb_filter_t *sb_filter) { char *sb_char = NULL; int sb; if (! sb_filter->thresh) { return; } if ( ! vcf_var_has_info_key(&sb_char, var, "SB")) { if ( ! sb_missing_warning_printed) { LOG_WARN("%s\n", "Requested SB filtering failed since SB tag is missing in variant"); sb_missing_warning_printed = 1; } return; } sb = atoi(sb_char); free(sb_char); if (sb > sb_filter->thresh) { if (sb_filter->no_compound || alt_mostly_on_one_strand(var)) { vcf_var_add_to_filter(var, sb_filter->id); } } }
void apply_uniq_threshold(var_t *var, uniq_filter_t *uniq_filter) { if (! uniq_filter->thresh) { return; } if (uniq_phred_from_var(var) < uniq_filter->thresh) { vcf_var_add_to_filter(var, uniq_filter->id); } }
void apply_indelqual_threshold(var_t *var, indelqual_filter_t *indelqual_filter) { assert (vcf_var_has_info_key(NULL, var, "INDEL")); if (! indelqual_filter->thresh) { return; } if (var->qual>-1 && var->qual<indelqual_filter->thresh) { vcf_var_add_to_filter(var, indelqual_filter->id); } }
void apply_dp_filter(var_t *var, dp_filter_t *dp_filter) { char *dp_char = NULL; int cov; if (dp_missing_warning_printed) { return; } if (dp_filter->min > 0 || dp_filter->max > 0) { if ( ! vcf_var_has_info_key(&dp_char, var, "DP")) { if ( ! dp_missing_warning_printed) { #ifdef DEBUG vcf_file_t f; f.fh = stderr; f.gz = 0; vcf_write_var(&f, var); #endif LOG_WARN("%s\n", "Requested coverage filtering failed since DP tag is missing in variant"); dp_missing_warning_printed = 1; return; } } errno = 0; /*cov = atoi(dp_char);*/ cov = strtol(dp_char, (char **) NULL, 10); if (errno) { LOG_FATAL("%s\n", "errpr during int conversion"); exit(1); } free(dp_char); if (dp_filter->min > 0 && cov < dp_filter->min) { vcf_var_add_to_filter(var, dp_filter->id_min); } if (dp_filter->max > 0 && cov > dp_filter->max) { vcf_var_add_to_filter(var, dp_filter->id_max); } } }
/* returns -1 on error * * filter everything that's not significant * * FIXME should be part of lofreq filter. * */ int apply_uniq_filter_mtc(uniq_filter_t *uniq_filter, var_t **vars, const int num_vars) { double *uniq_probs = NULL; int i; if (uniq_filter->ntests && num_vars > uniq_filter->ntests) { LOG_WARN("%s\n", "Number of predefined tests for uniq filter larger than number of variants! Are you sure that makes sense?"); } if (! uniq_filter->ntests) { uniq_filter->ntests = num_vars; } /* collect uniq error probs */ uniq_probs = malloc(num_vars * sizeof(double)); if ( ! uniq_probs) { LOG_FATAL("%s\n", "out of memory"); exit(1); } for (i=0; i<num_vars; i++) { uniq_probs[i] = PHREDQUAL_TO_PROB(uniq_phred_from_var(vars[i])); } /* multiple testing correction */ if (uniq_filter->mtc_type == MTC_BONF) { bonf_corr(uniq_probs, num_vars, uniq_filter->ntests); } else if (uniq_filter->mtc_type == MTC_HOLMBONF) { holm_bonf_corr(uniq_probs, num_vars, uniq_filter->alpha, uniq_filter->ntests); } else if (uniq_filter->mtc_type == MTC_FDR) { int num_rej = 0; long int *idx_rej; /* indices of rejected i.e. significant values */ int i; num_rej = fdr(uniq_probs, num_vars, uniq_filter->alpha, uniq_filter->ntests, &idx_rej); for (i=0; i<num_rej; i++) { int idx = idx_rej[i]; uniq_probs[idx] = -1; } free(idx_rej); } else { LOG_FATAL("Internal error: unknown MTC type %d\n", uniq_filter->mtc_type); return -1; } for (i=0; i<num_vars; i++) { if (uniq_probs[i] > uniq_filter->alpha) { vcf_var_add_to_filter(vars[i], uniq_filter->id); } } free(uniq_probs); return 0; }
int main_filter(int argc, char *argv[]) { filter_conf_t cfg; char *vcf_in = NULL, *vcf_out = NULL; static int print_only_passed = 1; static int sb_filter_no_compound = 0; static int sb_filter_incl_indels = 0; static int only_indels = 0; static int only_snvs = 0; char *vcf_header = NULL; mtc_qual_t *mtc_quals = NULL; long int num_vars; static int no_defaults = 0; long int var_idx = -1; /* default filter options */ memset(&cfg, 0, sizeof(filter_conf_t)); cfg.dp_filter.min = cfg.dp_filter.max = -1; cfg.af_filter.min = cfg.af_filter.max = -1; cfg.sb_filter.alpha = DEFAULT_SIG; cfg.snvqual_filter.alpha = DEFAULT_SIG; cfg.indelqual_filter.alpha = DEFAULT_SIG; /* keep in sync with long_opts_str and usage * * getopt is a pain in the whole when it comes to syncing of long * and short args and usage. check out gopt, libcfu... */ while (1) { int c; static struct option long_opts[] = { /* see usage sync */ {"verbose", no_argument, &verbose, 1}, {"debug", no_argument, &debug, 1}, {"print-all", no_argument, &print_only_passed, 0}, {"no-defaults", no_argument, &no_defaults, 1}, {"only-indels", no_argument, &only_indels, 1}, {"only-snvs", no_argument, &only_snvs, 1}, {"help", no_argument, NULL, 'h'}, {"in", required_argument, NULL, 'i'}, {"out", required_argument, NULL, 'o'}, {"cov-min", required_argument, NULL, 'v'}, {"cov-max", required_argument, NULL, 'V'}, {"af-min", required_argument, NULL, 'a'}, {"af-max", required_argument, NULL, 'A'}, {"sb-thresh", required_argument, NULL, 'B'}, {"sb-mtc", required_argument, NULL, 'b'}, {"sb-alpha", required_argument, NULL, 'c'}, {"sb-no-compound", no_argument, &sb_filter_no_compound, 1}, {"sb-incl-indels", no_argument, &sb_filter_incl_indels, 1}, {"snvqual-thresh", required_argument, NULL, 'Q'}, {"snvqual-mtc", required_argument, NULL, 'q'}, {"snvqual-alpha", required_argument, NULL, 'r'}, {"snvqual-ntests", required_argument, NULL, 's'}, {"indelqual-thresh", required_argument, NULL, 'K'}, {"indelqual-mtc", required_argument, NULL, 'k'}, {"indelqual-alpha", required_argument, NULL, 'l'}, {"indelqual-ntests", required_argument, NULL, 'm'}, {0, 0, 0, 0} /* sentinel */ }; /* keep in sync with long_opts and usage */ static const char *long_opts_str = "hi:o:v:V:a:A:B:b:c:Q:q:r:s:K:k:l:m:"; /* getopt_long stores the option index here. */ int long_opts_index = 0; c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */ long_opts_str, long_opts, & long_opts_index); if (c == -1) { break; } switch (c) { /* keep in sync with long_opts etc */ case 'h': usage(& cfg); return 0; case 'i': vcf_in = strdup(optarg); break; case 'o': if (0 != strcmp(optarg, "-")) { if (file_exists(optarg)) { LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg); return 1; } } vcf_out = strdup(optarg); break; case 'v': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.dp_filter.min = atoi(optarg); break; case 'V': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.dp_filter.max = atoi(optarg); break; case 'a': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.af_filter.min = strtof(optarg, NULL); break; case 'A': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.af_filter.max = strtof(optarg, NULL); break; case 'B': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.sb_filter.thresh = atoi(optarg); break; case 'b': cfg.sb_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.sb_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for strandbias filtering\n", optarg); return -1; } break; case 'c': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.sb_filter.alpha = strtof(optarg, NULL); break; case 'Q': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.thresh = atoi(optarg); break; case 'q': cfg.snvqual_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.snvqual_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg); return -1; } break; case 'r': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.alpha = strtof(optarg, NULL); break; case 's': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.snvqual_filter.ntests = atol(optarg); break; case 'K': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.thresh = atoi(optarg); break; case 'k': cfg.indelqual_filter.mtc_type = mtc_str_to_type(optarg); if (-1 == cfg.indelqual_filter.mtc_type) { LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg); return -1; } break; case 'l': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.alpha = strtof(optarg, NULL); break; case 'm': if (! isdigit(optarg[0])) { LOG_FATAL("Non-numeric argument provided: %s\n", optarg); return -1; } cfg.indelqual_filter.ntests = atol(optarg); break; case '?': LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n"); return 1; default: break; } } cfg.print_only_passed = print_only_passed; cfg.only_indels = only_indels; cfg.only_snvs = only_snvs; cfg.sb_filter.no_compound = sb_filter_no_compound; cfg.sb_filter.incl_indels = sb_filter_incl_indels; if (cfg.only_indels && cfg.only_snvs) { LOG_FATAL("%s\n", "Can't keep only indels and only snvs"); return 1; } if (! no_defaults) { if (cfg.sb_filter.mtc_type==MTC_NONE && ! cfg.sb_filter.thresh) { LOG_VERBOSE("%s\n", "Setting default SB filtering method to FDR"); cfg.sb_filter.mtc_type = MTC_FDR; cfg.sb_filter.alpha = 0.001; } if (cfg.dp_filter.min<0) { cfg.dp_filter.min = 10; LOG_VERBOSE("Setting default minimum coverage to %d\n", cfg.dp_filter.min); } } else { LOG_VERBOSE("%s\n", "Skipping default settings"); } if (0 != argc - optind - 1) {/* FIXME needed at all? */ LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n"); return 1; } /* logic check of command line parameters */ if (cfg.dp_filter.max > 0 && cfg.dp_filter.max < cfg.dp_filter.min) { LOG_FATAL("%s\n", "Invalid coverage-filter settings"); return 1; } if ((cfg.af_filter.max > 0 && cfg.af_filter.max < cfg.af_filter.min) || (cfg.af_filter.max > 1.0)) { LOG_FATAL("%s\n", "Invalid AF-filter settings"); return 1; } if (cfg.sb_filter.thresh && cfg.sb_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed strand-bias threshold *and* multiple testing correction."); return 1; } if (cfg.snvqual_filter.thresh && cfg.snvqual_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed SNV quality threshold *and* multiple testing correction."); return 1; } if (cfg.indelqual_filter.thresh && cfg.indelqual_filter.mtc_type != MTC_NONE) { LOG_FATAL("%s\n", "Can't use fixed indel quality threshold *and* multiple testing correction."); return 1; } if (argc == 2) { fprintf(stderr, "\n"); usage(& cfg); return 1; } if (debug) { dump_filter_conf(& cfg); } /* missing file args default to stdin and stdout */ /* no streaming allowed for vcf_in: we need to determine thresholds first */ if (! vcf_in) { LOG_FATAL("%s\n", "Input VCF missing. No streaming allowed. Need to determine auto threshold in memory friendly manner first."); return 1; } if (! vcf_out) { vcf_out = malloc(2 * sizeof(char)); strcpy(vcf_out, "-"); } LOG_DEBUG("vcf_in=%s vcf_out=%s\n", vcf_in, vcf_out); /* First pass parsing to get qualities for MTC computation (if needed) */ if (cfg.sb_filter.mtc_type != MTC_NONE || cfg.snvqual_filter.mtc_type != MTC_NONE || cfg.indelqual_filter.mtc_type != MTC_NONE) { #ifdef TRACE long int i = 0; #endif LOG_VERBOSE("%s\n", "At least one type of multiple testing correction requested. Doing first pass of vcf"); if ((num_vars = mtc_quals_from_vcf_file(& mtc_quals, vcf_in)) < 0) { LOG_ERROR("Couldn't parse %s\n", vcf_in); return 1; } if (cfg.sb_filter.mtc_type != MTC_NONE) { if (apply_sb_filter_mtc(mtc_quals, & cfg.sb_filter, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on strand-bias pvalues failed"); return -1; } } if (cfg.indelqual_filter.mtc_type != MTC_NONE) { if (apply_indelqual_filter_mtc(mtc_quals, & cfg.indelqual_filter, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on indel quality pvalues failed"); return -1; } } if (cfg.snvqual_filter.mtc_type != MTC_NONE) { if (apply_snvqual_filter_mtc(mtc_quals, & cfg.snvqual_filter, num_vars)) { LOG_FATAL("%s\n", "Multiple testing correction on SNV quality pvalues failed"); return -1; } } #ifdef TRACE for (i=0; i<num_vars; i++) { LOG_WARN("mtc_quals #%ld sb_qual=%d var_qual=%d is_indel=%d\n", i, mtc_quals[i].sb_qual, mtc_quals[i].var_qual, mtc_quals[i].is_indel); } #endif LOG_VERBOSE("%s\n", "MTC application completed"); } else { LOG_VERBOSE("%s\n", "No multiple testing correction requested. First pass of vcf skipped"); } if (vcf_file_open(& cfg.vcf_in, vcf_in, HAS_GZIP_EXT(vcf_in), 'r')) { LOG_ERROR("Couldn't open %s\n", vcf_in); return 1; } if (vcf_file_open(& cfg.vcf_out, vcf_out, HAS_GZIP_EXT(vcf_out), 'w')) { LOG_ERROR("Couldn't open %s\n", vcf_out); return 1; } free(vcf_in); free(vcf_out); /* print header */ if (0 != vcf_parse_header(&vcf_header, & cfg.vcf_in)) { /* LOG_WARN("%s\n", "vcf_parse_header() failed"); */ if (vcf_file_seek(& cfg.vcf_in, 0, SEEK_SET)) { LOG_FATAL("%s\n", "Couldn't rewind file to parse variants" " after header parsing failed"); return -1; } } /* also sets filter names */ cfg_filter_to_vcf_header(& cfg, &vcf_header); vcf_write_header(& cfg.vcf_out, vcf_header); free(vcf_header); /* read in variants */ while (1) { var_t *var; int rc; int is_indel = 0; vcf_new_var(&var); rc = vcf_parse_var(& cfg.vcf_in, var); if (rc) { /* how to distinguish between error and EOF? */ break; } var_idx += 1; is_indel = vcf_var_is_indel(var); if (cfg.only_snvs && is_indel) { vcf_free_var(&var); continue; } else if (cfg.only_indels && ! is_indel) { vcf_free_var(&var); continue; } /* filters applying to all types of variants */ apply_af_filter(var, & cfg.af_filter); apply_dp_filter(var, & cfg.dp_filter); /* quality threshold per variant type */ if (! is_indel) { if (cfg.snvqual_filter.thresh) { assert(cfg.snvqual_filter.mtc_type == MTC_NONE); apply_snvqual_threshold(var, & cfg.snvqual_filter); } else if (cfg.snvqual_filter.mtc_type != MTC_NONE) { if (mtc_quals[var_idx].var_qual != -1) { vcf_var_add_to_filter(var, cfg.snvqual_filter.id); } } } else { if (cfg.indelqual_filter.thresh) { assert(cfg.indelqual_filter.mtc_type == MTC_NONE); apply_indelqual_threshold(var, & cfg.indelqual_filter); } else if (cfg.indelqual_filter.mtc_type != MTC_NONE) { if (mtc_quals[var_idx].var_qual != -1) { vcf_var_add_to_filter(var, cfg.indelqual_filter.id); } } } /* sb filter */ if (cfg.sb_filter.thresh) { if (! is_indel || cfg.sb_filter.incl_indels) { assert(cfg.sb_filter.mtc_type == MTC_NONE); apply_sb_threshold(var, & cfg.sb_filter); } } else if (cfg.sb_filter.mtc_type != MTC_NONE) { if (! is_indel || cfg.sb_filter.incl_indels) { if (mtc_quals[var_idx].sb_qual == -1) { vcf_var_add_to_filter(var, cfg.sb_filter.id); } } } /* output */ if (cfg.print_only_passed && ! (VCF_VAR_PASSES(var))) { vcf_free_var(&var); continue; } /* add pass if no filters were set */ if (! var->filter || strlen(var->filter)<=1) { char pass_str[] = "PASS"; if (var->filter) { free(var->filter); } var->filter = strdup(pass_str); } vcf_write_var(& cfg.vcf_out, var); vcf_free_var(&var); if (var_idx%1000==0) { (void) vcf_file_flush(& cfg.vcf_out); } } vcf_file_close(& cfg.vcf_in); vcf_file_close(& cfg.vcf_out); free(mtc_quals); LOG_VERBOSE("%s\n", "Successful exit."); return 0; }
/* returns -1 on error * * filter everything that's significant * * very similar to in apply_snvqual_filter_mtc, but reverse logic and looking at all vars */ int apply_sb_filter_mtc(sb_filter_t *sb_filter, var_t **vars, const long int num_vars) { double *sb_probs = NULL; long int i; long int num_ign = 0; long int *orig_idx = NULL;/* we might ignore some variants (missing values etc). keep track of real indices of kept vars */ /* collect values from vars kept in mem */ sb_probs = malloc(num_vars * sizeof(double)); if ( ! sb_probs) {LOG_FATAL("%s\n", "out of memory"); return -1;} orig_idx = malloc(num_vars * sizeof(long int)); if ( ! orig_idx) {LOG_FATAL("%s\n", "out of memory"); return -1;} num_ign = 0; for (i=0; i<num_vars; i++) { char *sb_char = NULL; /* ignore indels too if sb filter is not to be applied */ if (! sb_filter->incl_indels && vcf_var_is_indel(vars[i])) { num_ign += 1; continue; } if ( ! vcf_var_has_info_key(&sb_char, vars[i], "SB")) { if ( ! sb_missing_warning_printed) { LOG_WARN("%s\n", "At least one variant has no SB tag! SB filtering will be incomplete"); sb_missing_warning_printed = 1; } num_ign += 1; continue; } sb_probs[i-num_ign] = PHREDQUAL_TO_PROB(atoi(sb_char)); orig_idx[i-num_ign] = i; /*LOG_FIXME("orig_idx[i=%ld - num_ign=%ld = %ld] = i=%ld\n", i, num_ign, i-num_ign, i);*/ free(sb_char); } if (num_vars-num_ign <= 0) { free(sb_probs); free(orig_idx); return 0; } /* realloc to smaller size apparently not guaranteed to free up space so no point really but let's make sure we don't use that memory */ sb_probs = realloc(sb_probs, (num_vars-num_ign) * sizeof(double)); if (! sb_probs) { LOG_FATAL("realloc failed. Exiting..."); return -1; } orig_idx = realloc(orig_idx, (num_vars-num_ign) * sizeof(long int)); if (! orig_idx) { LOG_FATAL("realloc failed. Exiting..."); return -1; } if (! sb_filter->ntests) { sb_filter->ntests = num_vars - num_ign; } else { if (num_vars-num_ign > sb_filter->ntests) { LOG_WARN("%s\n", "Number of predefined tests for SB filter larger than number of variants! Are you sure that makes sense?"); } } /* multiple testing correction */ if (sb_filter->mtc_type == MTC_BONF) { bonf_corr(sb_probs, num_vars-num_ign, sb_filter->ntests); } else if (sb_filter->mtc_type == MTC_HOLMBONF) { holm_bonf_corr(sb_probs, num_vars-num_ign, sb_filter->alpha, sb_filter->ntests); } else if (sb_filter->mtc_type == MTC_FDR) { long int num_rej = 0; long int *idx_rej; /* indices of rejected i.e. significant values */ num_rej = fdr(sb_probs, num_vars-num_ign, sb_filter->alpha, sb_filter->ntests, &idx_rej); /* first pretend none are significant */ for (i=0; i<num_vars-num_ign; i++) { sb_probs[i] = DBL_MAX; } LOG_DEBUG("%ld results significant after fdr\n", num_rej); for (i=0; i<num_rej; i++) { long int idx = idx_rej[i]; sb_probs[idx] = -1; } free(idx_rej); } else { LOG_FATAL("Internal error: unknown MTC type %d\n", sb_filter->mtc_type); return -1; } for (i=0; i<num_vars-num_ign; i++) { if (sb_probs[i] < sb_filter->alpha) { if (sb_filter->no_compound || alt_mostly_on_one_strand(vars[orig_idx[i]])) { vcf_var_add_to_filter(vars[orig_idx[i]], sb_filter->id); } } } free(orig_idx); free(sb_probs); return 0; }
/* returns -1 on error * * filter everything that's not significant * * Very similar to apply_sb_filter_mtc, but reverse testing logic and only looking at non consvars * */ int apply_indelqual_filter_mtc(indelqual_filter_t *indelqual_filter, var_t **vars, const long int num_vars) { /* can only apply this logic to variants that are not consensus * variants, i.e those that actually have a quality. therefore * keep track of non cons var indeces */ long int *orig_idx = NULL; /* of size num_noncons_vars */ double *noncons_errprobs = NULL; long int num_noncons_vars = 0; long int i; /* FIXME function almost identical to apply_indelqual_filter_mtc just different filter can be easily merged by accepting both types of variants */ /* collect values from noncons vars only and keep track of their indeces */ orig_idx = malloc(num_vars * sizeof(long int)); if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; } noncons_errprobs = malloc(num_vars * sizeof(double)); if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; } num_noncons_vars = 0; for (i=0; i<num_vars; i++) { if (vars[i]->qual>-1 && vcf_var_has_info_key(NULL, vars[i], "INDEL")) { noncons_errprobs[num_noncons_vars] = PHREDQUAL_TO_PROB(vars[i]->qual); orig_idx[num_noncons_vars] = i; num_noncons_vars += 1; } } if (! num_noncons_vars) { free(noncons_errprobs); free(orig_idx); return 0; } if (indelqual_filter->ntests && num_noncons_vars > indelqual_filter->ntests) { LOG_WARN("Number of (non consensus) variants larger than number of predefined tests for indelqual filter (%ld > %ld)! Are you sure that makes sense?\n", num_noncons_vars, indelqual_filter->ntests); } orig_idx = realloc(orig_idx, (num_noncons_vars * sizeof(long int))); if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; } noncons_errprobs = realloc(noncons_errprobs, (num_noncons_vars * sizeof(double))); if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; } /* only now we can set the number of tests (if it wasn't set by * caller) */ if (! indelqual_filter->ntests) { indelqual_filter->ntests = num_noncons_vars; } /* multiple testing correction */ if (indelqual_filter->mtc_type == MTC_BONF) { bonf_corr(noncons_errprobs, num_noncons_vars, indelqual_filter->ntests); } else if (indelqual_filter->mtc_type == MTC_HOLMBONF) { holm_bonf_corr(noncons_errprobs, num_noncons_vars, indelqual_filter->alpha, indelqual_filter->ntests); } else if (indelqual_filter->mtc_type == MTC_FDR) { long int num_rej = 0; long int *idx_rej; /* indices of rejected i.e. significant values */ num_rej = fdr(noncons_errprobs, num_noncons_vars, indelqual_filter->alpha, indelqual_filter->ntests, &idx_rej); /* first pretend none are significant */ for (i=0; i<num_noncons_vars; i++) { noncons_errprobs[i] = DBL_MAX; } LOG_DEBUG("%ld results significant after fdr\n", num_rej); for (i=0; i<num_rej; i++) { long int idx = idx_rej[i]; noncons_errprobs[idx] = -1; } free(idx_rej); } else { LOG_FATAL("Internal error: unknown MTC type %d\n", indelqual_filter->mtc_type); free(orig_idx); free(noncons_errprobs); return -1; } for (i=0; i<num_noncons_vars; i++) { if (noncons_errprobs[i] > indelqual_filter->alpha) { vcf_var_add_to_filter(vars[orig_idx[i]], indelqual_filter->id); } } free(orig_idx); free(noncons_errprobs); return 0; }