Example #1
0
void apply_af_filter(var_t *var, af_filter_t *af_filter)
{
     char *af_char = NULL;
     float af;

     if (af_missing_warning_printed) {
          return;
     }

     if (af_filter->min > 0 || af_filter->max > 0) {
          if ( ! vcf_var_has_info_key(&af_char, var, "AF")) {
               if ( ! af_missing_warning_printed) {
                    LOG_WARN("%s\n", "Requested AF filtering failed since AF tag is missing in variant");
                    af_missing_warning_printed = 1;
                    return;
               }
          }
          af = strtof(af_char, (char **)NULL); /* atof */
          if (errno==ERANGE) {
               LOG_ERROR("Couldn't parse EF from af_char %s. Disabling AF filtering", af_char);
               af_missing_warning_printed = 1;
               return;
          }
          free(af_char);

          if (af_filter->min > 0.0 && af < af_filter->min) {
               vcf_var_add_to_filter(var, af_filter->id_min);
          }
          if (af_filter->max > 0.0 && af > af_filter->max) {
               vcf_var_add_to_filter(var, af_filter->id_max);
          }
     }
}
Example #2
0
void apply_sb_threshold(var_t *var, sb_filter_t *sb_filter)
{
     char *sb_char = NULL;
     int sb;

     if (! sb_filter->thresh) {
          return;
     }

     if ( ! vcf_var_has_info_key(&sb_char, var, "SB")) {
          if ( ! sb_missing_warning_printed) {
               LOG_WARN("%s\n", "Requested SB filtering failed since SB tag is missing in variant");
               sb_missing_warning_printed = 1;
          }
          return;
     }
     sb = atoi(sb_char);
     free(sb_char);

     if (sb > sb_filter->thresh) {
          if (sb_filter->no_compound || alt_mostly_on_one_strand(var)) {
               vcf_var_add_to_filter(var, sb_filter->id);
          }
     }
}
Example #3
0
void apply_uniq_threshold(var_t *var, uniq_filter_t *uniq_filter)
{
     if (! uniq_filter->thresh) {
          return;
     }

     if (uniq_phred_from_var(var) < uniq_filter->thresh) {
          vcf_var_add_to_filter(var, uniq_filter->id);
     }
}
Example #4
0
void apply_indelqual_threshold(var_t *var, indelqual_filter_t *indelqual_filter)
{
     assert (vcf_var_has_info_key(NULL, var, "INDEL"));
     if (! indelqual_filter->thresh) {
          return;
     }
     if (var->qual>-1 && var->qual<indelqual_filter->thresh) {
          vcf_var_add_to_filter(var, indelqual_filter->id);
     }
}
Example #5
0
void apply_dp_filter(var_t *var, dp_filter_t *dp_filter)
{
     char *dp_char = NULL;
     int cov;

     if (dp_missing_warning_printed) {
          return;
     }

     if (dp_filter->min > 0 || dp_filter->max > 0) {
          if ( ! vcf_var_has_info_key(&dp_char, var, "DP")) {
               if ( ! dp_missing_warning_printed) {
#ifdef DEBUG
                    vcf_file_t f; f.fh = stderr; f.gz = 0; vcf_write_var(&f, var);
#endif
                    LOG_WARN("%s\n", "Requested coverage filtering failed since DP tag is missing in variant");
                    dp_missing_warning_printed = 1;
                    return;
               }
          }
          errno = 0;
          /*cov = atoi(dp_char);*/
          cov = strtol(dp_char, (char **) NULL, 10);
          if (errno) {
               LOG_FATAL("%s\n", "errpr during int conversion");
               exit(1);
          }
          free(dp_char);
 
          if (dp_filter->min > 0 && cov < dp_filter->min) {
               vcf_var_add_to_filter(var, dp_filter->id_min);
          }
          if (dp_filter->max > 0 && cov > dp_filter->max) {
               vcf_var_add_to_filter(var, dp_filter->id_max);
          }
     }
}
Example #6
0
/* returns -1 on error 
 *
 * filter everything that's not significant
 * 
 * FIXME should be part of lofreq filter.
 *
 */
int 
apply_uniq_filter_mtc(uniq_filter_t *uniq_filter, var_t **vars, const int num_vars)
{
     double *uniq_probs = NULL;
     int i;

     if (uniq_filter->ntests && num_vars > uniq_filter->ntests) {
         LOG_WARN("%s\n", "Number of predefined tests for uniq filter larger than number of variants! Are you sure that makes sense?");
     }

     if (! uniq_filter->ntests) {
          uniq_filter->ntests = num_vars;
     }

     /* collect uniq error probs
      */
     uniq_probs = malloc(num_vars * sizeof(double));
     if ( ! uniq_probs) {
          LOG_FATAL("%s\n", "out of memory");
          exit(1);
     }
     for (i=0; i<num_vars; i++) {
          uniq_probs[i] = PHREDQUAL_TO_PROB(uniq_phred_from_var(vars[i]));
     }

     /* multiple testing correction
      */
     if (uniq_filter->mtc_type == MTC_BONF) {
          bonf_corr(uniq_probs, num_vars, 
                    uniq_filter->ntests);
          
     } else if (uniq_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(uniq_probs, num_vars, 
                         uniq_filter->alpha, uniq_filter->ntests);
          
     } else if (uniq_filter->mtc_type == MTC_FDR) {
          int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          int i;
          
          num_rej = fdr(uniq_probs, num_vars, 
                        uniq_filter->alpha, uniq_filter->ntests, 
                        &idx_rej);
          for (i=0; i<num_rej; i++) {
               int idx = idx_rej[i];
               uniq_probs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", uniq_filter->mtc_type);
          return -1;
     }

     for (i=0; i<num_vars; i++) {
          if (uniq_probs[i] > uniq_filter->alpha) {
               vcf_var_add_to_filter(vars[i], uniq_filter->id);
          }
     }

     free(uniq_probs);

     return 0;
}
Example #7
0
int
main_filter(int argc, char *argv[])
{
     filter_conf_t cfg;
     char *vcf_in = NULL, *vcf_out = NULL;
     static int print_only_passed = 1;
     static int sb_filter_no_compound = 0;
     static int sb_filter_incl_indels = 0;
     static int only_indels = 0;
     static int only_snvs = 0;
     char *vcf_header = NULL;
     mtc_qual_t *mtc_quals = NULL;
     long int num_vars;
     static int no_defaults = 0;
     long int var_idx = -1;

     /* default filter options */
     memset(&cfg, 0, sizeof(filter_conf_t));
     cfg.dp_filter.min = cfg.dp_filter.max = -1;
     cfg.af_filter.min = cfg.af_filter.max = -1;
     cfg.sb_filter.alpha = DEFAULT_SIG;
     cfg.snvqual_filter.alpha = DEFAULT_SIG;
     cfg.indelqual_filter.alpha = DEFAULT_SIG;


    /* keep in sync with long_opts_str and usage
     *
     * getopt is a pain in the whole when it comes to syncing of long
     * and short args and usage. check out gopt, libcfu...
     */
    while (1) {
         int c;
         static struct option long_opts[] = {
              /* see usage sync */
              {"verbose", no_argument, &verbose, 1},
              {"debug", no_argument, &debug, 1},
              {"print-all", no_argument, &print_only_passed, 0},
              {"no-defaults", no_argument, &no_defaults, 1},
              {"only-indels", no_argument, &only_indels, 1},
              {"only-snvs", no_argument, &only_snvs, 1},

              {"help", no_argument, NULL, 'h'},
              {"in", required_argument, NULL, 'i'},
              {"out", required_argument, NULL, 'o'},

              {"cov-min", required_argument, NULL, 'v'},
              {"cov-max", required_argument, NULL, 'V'},

              {"af-min", required_argument, NULL, 'a'},
              {"af-max", required_argument, NULL, 'A'},

              {"sb-thresh", required_argument, NULL, 'B'},
              {"sb-mtc", required_argument, NULL, 'b'},
              {"sb-alpha", required_argument, NULL, 'c'},
              {"sb-no-compound", no_argument, &sb_filter_no_compound, 1},
              {"sb-incl-indels", no_argument, &sb_filter_incl_indels, 1},

              {"snvqual-thresh", required_argument, NULL, 'Q'},
              {"snvqual-mtc", required_argument, NULL, 'q'},
              {"snvqual-alpha", required_argument, NULL, 'r'},
              {"snvqual-ntests", required_argument, NULL, 's'},

              {"indelqual-thresh", required_argument, NULL, 'K'},
              {"indelqual-mtc", required_argument, NULL, 'k'},
              {"indelqual-alpha", required_argument, NULL, 'l'},
              {"indelqual-ntests", required_argument, NULL, 'm'},

              {0, 0, 0, 0} /* sentinel */
         };

         /* keep in sync with long_opts and usage */
         static const char *long_opts_str = "hi:o:v:V:a:A:B:b:c:Q:q:r:s:K:k:l:m:";

         /* getopt_long stores the option index here. */
         int long_opts_index = 0;
         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
                         long_opts_str, long_opts, & long_opts_index);
         if (c == -1) {
              break;
         }

         switch (c) {
         /* keep in sync with long_opts etc */
         case 'h':
              usage(& cfg);
              return 0;

         case 'i':
              vcf_in = strdup(optarg);
              break;
         case 'o':
              if (0 != strcmp(optarg, "-")) {
                   if (file_exists(optarg)) {
                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
                        return 1;
                   }
              }
              vcf_out = strdup(optarg);
              break;

         case 'v':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.dp_filter.min = atoi(optarg);
              break;
         case 'V':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.dp_filter.max = atoi(optarg);
              break;

         case 'a':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.af_filter.min = strtof(optarg, NULL);
              break;
         case 'A':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.af_filter.max = strtof(optarg, NULL);
              break;

         case 'B':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.sb_filter.thresh = atoi(optarg);
              break;
         case 'b':
              cfg.sb_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.sb_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for strandbias filtering\n", optarg);
                   return -1;
              }
              break;
         case 'c':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.sb_filter.alpha = strtof(optarg, NULL);
              break;

         case 'Q':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.thresh = atoi(optarg);
              break;
         case 'q':
              cfg.snvqual_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.snvqual_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
                   return -1;
              }
              break;
         case 'r':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.alpha = strtof(optarg, NULL);
              break;
         case 's':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.snvqual_filter.ntests = atol(optarg);
              break;

         case 'K':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.thresh = atoi(optarg);
              break;
         case 'k':
              cfg.indelqual_filter.mtc_type = mtc_str_to_type(optarg);
              if (-1 == cfg.indelqual_filter.mtc_type) {
                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
                   return -1;
              }
              break;
         case 'l':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.alpha = strtof(optarg, NULL);
              break;
         case 'm':
              if (! isdigit(optarg[0])) {
                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
                   return -1;
              }
              cfg.indelqual_filter.ntests = atol(optarg);
              break;

         case '?':
              LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
              return 1;

         default:
              break;
         }
    }
    cfg.print_only_passed = print_only_passed;
    cfg.only_indels = only_indels;
    cfg.only_snvs = only_snvs;
    cfg.sb_filter.no_compound = sb_filter_no_compound;
    cfg.sb_filter.incl_indels = sb_filter_incl_indels;

    if (cfg.only_indels && cfg.only_snvs) {
         LOG_FATAL("%s\n", "Can't keep only indels and only snvs");
         return 1;
    }
    
    if (! no_defaults) {
         if (cfg.sb_filter.mtc_type==MTC_NONE && ! cfg.sb_filter.thresh) {
              LOG_VERBOSE("%s\n", "Setting default SB filtering method to FDR");
              cfg.sb_filter.mtc_type = MTC_FDR;
              cfg.sb_filter.alpha = 0.001;
         }
         if (cfg.dp_filter.min<0) {
              cfg.dp_filter.min = 10;
              LOG_VERBOSE("Setting default minimum coverage to %d\n", cfg.dp_filter.min);
         }
    } else {
         LOG_VERBOSE("%s\n", "Skipping default settings");
    }

    if (0 != argc - optind - 1) {/* FIXME needed at all? */
         LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
         return 1;
    }

    /* logic check of command line parameters
     */
    if (cfg.dp_filter.max > 0 &&  cfg.dp_filter.max < cfg.dp_filter.min) {
         LOG_FATAL("%s\n", "Invalid coverage-filter settings");
         return 1;
    }
    if ((cfg.af_filter.max > 0 && cfg.af_filter.max < cfg.af_filter.min) ||
        (cfg.af_filter.max > 1.0)) {
         LOG_FATAL("%s\n", "Invalid AF-filter settings");
         return 1;
    }

    if (cfg.sb_filter.thresh && cfg.sb_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed strand-bias threshold *and* multiple testing correction.");
         return 1;
    }
    if (cfg.snvqual_filter.thresh && cfg.snvqual_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed SNV quality threshold *and* multiple testing correction.");
         return 1;
    }
    if (cfg.indelqual_filter.thresh && cfg.indelqual_filter.mtc_type != MTC_NONE) {
         LOG_FATAL("%s\n", "Can't use fixed indel quality threshold *and* multiple testing correction.");
         return 1;
    }

    if (argc == 2) {
        fprintf(stderr, "\n");
        usage(& cfg);
        return 1;
    }

    if (debug) {
         dump_filter_conf(& cfg);
     }

    /* missing file args default to stdin and stdout
     */
    /* no streaming allowed for vcf_in: we need to determine thresholds first */
    if  (! vcf_in) {
         LOG_FATAL("%s\n", "Input VCF missing. No streaming allowed. Need to determine auto threshold in memory friendly manner first.");
         return 1;
    }
    if  (! vcf_out) {
         vcf_out = malloc(2 * sizeof(char));
         strcpy(vcf_out, "-");
    }
    LOG_DEBUG("vcf_in=%s vcf_out=%s\n", vcf_in, vcf_out);



    /* First pass parsing to get qualities for MTC computation (if needed)
     */
    if (cfg.sb_filter.mtc_type != MTC_NONE || cfg.snvqual_filter.mtc_type != MTC_NONE || cfg.indelqual_filter.mtc_type != MTC_NONE) {
#ifdef TRACE
         long int i = 0;
#endif
         LOG_VERBOSE("%s\n", "At least one type of multiple testing correction requested. Doing first pass of vcf");

         if ((num_vars = mtc_quals_from_vcf_file(& mtc_quals, vcf_in)) < 0) {
              LOG_ERROR("Couldn't parse %s\n", vcf_in);
              return 1;
         }

         if (cfg.sb_filter.mtc_type != MTC_NONE) {
              if (apply_sb_filter_mtc(mtc_quals, & cfg.sb_filter, num_vars)) {
                   LOG_FATAL("%s\n", "Multiple testing correction on strand-bias pvalues failed");
                   return -1;
              }
         }
         if (cfg.indelqual_filter.mtc_type != MTC_NONE) {
              if (apply_indelqual_filter_mtc(mtc_quals, & cfg.indelqual_filter, num_vars)) {
                   LOG_FATAL("%s\n", "Multiple testing correction on indel quality pvalues failed");
                   return -1;
              }
         }
         if (cfg.snvqual_filter.mtc_type != MTC_NONE) {
              if (apply_snvqual_filter_mtc(mtc_quals, & cfg.snvqual_filter, num_vars)) {
                   LOG_FATAL("%s\n", "Multiple testing correction on SNV quality pvalues failed");
                   return -1;
              }
         }
#ifdef TRACE
         for (i=0; i<num_vars; i++) {
              LOG_WARN("mtc_quals #%ld sb_qual=%d var_qual=%d is_indel=%d\n", 
                       i, mtc_quals[i].sb_qual, mtc_quals[i].var_qual, mtc_quals[i].is_indel);
         }
#endif
         LOG_VERBOSE("%s\n", "MTC application completed");
    } else {
         LOG_VERBOSE("%s\n", "No multiple testing correction requested. First pass of vcf skipped");

    }

    
    if (vcf_file_open(& cfg.vcf_in, vcf_in,
                      HAS_GZIP_EXT(vcf_in), 'r')) {
         LOG_ERROR("Couldn't open %s\n", vcf_in);
         return 1;
    }
    if (vcf_file_open(& cfg.vcf_out, vcf_out,
                      HAS_GZIP_EXT(vcf_out), 'w')) {
         LOG_ERROR("Couldn't open %s\n", vcf_out);
         return 1;
    }
    free(vcf_in);
    free(vcf_out);

    /* print header
     */
    if (0 !=  vcf_parse_header(&vcf_header, & cfg.vcf_in)) {
         /* LOG_WARN("%s\n", "vcf_parse_header() failed"); */
         if (vcf_file_seek(& cfg.vcf_in, 0, SEEK_SET)) {
              LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
                        " after header parsing failed");
              return -1;
         }
    }
    /* also sets filter names */
    cfg_filter_to_vcf_header(& cfg, &vcf_header);
    vcf_write_header(& cfg.vcf_out, vcf_header);
    free(vcf_header);


    /* read in variants
     */
    while (1) {
         var_t *var;
         int rc;
         int is_indel = 0;

         vcf_new_var(&var);
         rc = vcf_parse_var(& cfg.vcf_in, var);
         if (rc) {
              /* how to distinguish between error and EOF? */
              break;
         }
         var_idx += 1;

         is_indel = vcf_var_is_indel(var);

         if (cfg.only_snvs && is_indel) {
              vcf_free_var(&var);
              continue;
         } else if (cfg.only_indels && ! is_indel) {
              vcf_free_var(&var);
              continue;
         }


         /* filters applying to all types of variants
          */
         apply_af_filter(var, & cfg.af_filter);
         apply_dp_filter(var, & cfg.dp_filter);

         /* quality threshold per variant type
          */
         if (! is_indel) {
              if (cfg.snvqual_filter.thresh) {
                   assert(cfg.snvqual_filter.mtc_type == MTC_NONE);
                   apply_snvqual_threshold(var, & cfg.snvqual_filter);
              } else if (cfg.snvqual_filter.mtc_type != MTC_NONE) {
                   if (mtc_quals[var_idx].var_qual != -1) {
                        vcf_var_add_to_filter(var, cfg.snvqual_filter.id);
                   }
              }

         } else {
              if (cfg.indelqual_filter.thresh) {
                   assert(cfg.indelqual_filter.mtc_type == MTC_NONE);
                   apply_indelqual_threshold(var, & cfg.indelqual_filter);
              } else if (cfg.indelqual_filter.mtc_type != MTC_NONE) {
                   if (mtc_quals[var_idx].var_qual != -1) {
                        vcf_var_add_to_filter(var, cfg.indelqual_filter.id);
                   }
              }
         }
         
         /* sb filter 
          */
         if (cfg.sb_filter.thresh) {
              if (! is_indel || cfg.sb_filter.incl_indels) {
                   assert(cfg.sb_filter.mtc_type == MTC_NONE);
                   apply_sb_threshold(var, & cfg.sb_filter);
              }
         } else if (cfg.sb_filter.mtc_type != MTC_NONE) {
              if (! is_indel || cfg.sb_filter.incl_indels) {
                   if (mtc_quals[var_idx].sb_qual == -1) {
                        vcf_var_add_to_filter(var, cfg.sb_filter.id);
                   }
              }              
         }
         

         /* output
          */
         if (cfg.print_only_passed && ! (VCF_VAR_PASSES(var))) {
              vcf_free_var(&var);
              continue;
         }

         /* add pass if no filters were set */
         if (! var->filter || strlen(var->filter)<=1) {
              char pass_str[] = "PASS";
              if (var->filter) {
                   free(var->filter);
              }
              var->filter = strdup(pass_str);
         }

         vcf_write_var(& cfg.vcf_out, var);
         vcf_free_var(&var);

         if (var_idx%1000==0) {
              (void) vcf_file_flush(& cfg.vcf_out);
         }
    }

    vcf_file_close(& cfg.vcf_in);
    vcf_file_close(& cfg.vcf_out);

    free(mtc_quals);

    LOG_VERBOSE("%s\n", "Successful exit.");

    return 0;
}
Example #8
0
/* returns -1 on error 
 *
 * filter everything that's significant
 *
 * very similar to in apply_snvqual_filter_mtc, but reverse logic and looking at all vars
 */
int apply_sb_filter_mtc(sb_filter_t *sb_filter, var_t **vars, const long int num_vars)
{
     double *sb_probs = NULL;
     long int i;
     long int num_ign = 0;
     long int *orig_idx = NULL;/* we might ignore some variants (missing values etc). keep track of real indices of kept vars */

     
     /* collect values from vars kept in mem
      */
     sb_probs = malloc(num_vars * sizeof(double));
     if ( ! sb_probs) {LOG_FATAL("%s\n", "out of memory"); return -1;}
     orig_idx = malloc(num_vars * sizeof(long int));
     if ( ! orig_idx) {LOG_FATAL("%s\n", "out of memory"); return -1;}

     num_ign = 0;
     for (i=0; i<num_vars; i++) {
          char *sb_char = NULL;
          
          /* ignore indels too if sb filter is not to be applied */
          if (! sb_filter->incl_indels && vcf_var_is_indel(vars[i])) {
               num_ign += 1;
               continue;
          }

          if ( ! vcf_var_has_info_key(&sb_char, vars[i], "SB")) {
               if ( ! sb_missing_warning_printed) {
                    LOG_WARN("%s\n", "At least one variant has no SB tag! SB filtering will be incomplete");
                    sb_missing_warning_printed = 1;
               }
               num_ign += 1;
               continue;
          }

          sb_probs[i-num_ign] = PHREDQUAL_TO_PROB(atoi(sb_char));
          orig_idx[i-num_ign] = i;
          /*LOG_FIXME("orig_idx[i=%ld - num_ign=%ld = %ld] = i=%ld\n", i, num_ign, i-num_ign, i);*/
          free(sb_char);
     }
     if (num_vars-num_ign <= 0) {
          free(sb_probs);
          free(orig_idx);
          return 0;
     }


     /* realloc to smaller size apparently not guaranteed to free up space so no point really but let's make sure we don't use that memory */
     sb_probs = realloc(sb_probs, (num_vars-num_ign) * sizeof(double));
     if (! sb_probs) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
     orig_idx = realloc(orig_idx, (num_vars-num_ign) * sizeof(long int));
     if (! orig_idx) { LOG_FATAL("realloc failed. Exiting..."); return -1; }

     if (! sb_filter->ntests) {
          sb_filter->ntests = num_vars - num_ign;
     } else {
          if (num_vars-num_ign > sb_filter->ntests) {
               LOG_WARN("%s\n", "Number of predefined tests for SB filter larger than number of variants! Are you sure that makes sense?");
          }
     }


     /* multiple testing correction
      */
     if (sb_filter->mtc_type == MTC_BONF) {
          bonf_corr(sb_probs, num_vars-num_ign, 
                    sb_filter->ntests);
          
     } else if (sb_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(sb_probs, num_vars-num_ign, 
                         sb_filter->alpha, sb_filter->ntests);
          
     } else if (sb_filter->mtc_type == MTC_FDR) {
          long int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          
          num_rej = fdr(sb_probs, num_vars-num_ign, 
                        sb_filter->alpha, sb_filter->ntests, 
                        &idx_rej);

          /* first pretend none are significant */
          for (i=0; i<num_vars-num_ign; i++) {
               sb_probs[i] = DBL_MAX;
          }
          LOG_DEBUG("%ld results significant after fdr\n", num_rej);
          for (i=0; i<num_rej; i++) {
               long int idx = idx_rej[i];
               sb_probs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", sb_filter->mtc_type);
          return -1;
     }
     
     for (i=0; i<num_vars-num_ign; i++) {
          if (sb_probs[i] < sb_filter->alpha) {
               if (sb_filter->no_compound || alt_mostly_on_one_strand(vars[orig_idx[i]])) {
                    vcf_var_add_to_filter(vars[orig_idx[i]], sb_filter->id);
               }
          }
     }

     free(orig_idx);
     free(sb_probs);

     return 0;
}
Example #9
0
/* returns -1 on error 
 *
 * filter everything that's not significant
 * 
 * Very similar to apply_sb_filter_mtc, but reverse testing logic and only looking at non consvars
 *
 */
int apply_indelqual_filter_mtc(indelqual_filter_t *indelqual_filter, var_t **vars, const long int num_vars)
{
     /* can only apply this logic to variants that are not consensus
      * variants, i.e those that actually have a quality. therefore
      * keep track of non cons var indeces */
     long int *orig_idx = NULL; /* of size num_noncons_vars */
     double *noncons_errprobs = NULL;
     long int num_noncons_vars = 0;
     long int i;

     /* FIXME function almost identical to apply_indelqual_filter_mtc just different filter can be easily merged by accepting both types of variants */

     /* collect values from noncons vars only and keep track of their indeces
      */
     orig_idx = malloc(num_vars * sizeof(long int));
     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
     noncons_errprobs = malloc(num_vars * sizeof(double));
     if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1;
     }
     num_noncons_vars = 0;
     for (i=0; i<num_vars; i++) {
          if (vars[i]->qual>-1 && vcf_var_has_info_key(NULL, vars[i], "INDEL")) {
               noncons_errprobs[num_noncons_vars] = PHREDQUAL_TO_PROB(vars[i]->qual);
               orig_idx[num_noncons_vars] = i;
               num_noncons_vars += 1;
          }
     }
     if (! num_noncons_vars) {
          free(noncons_errprobs);
          free(orig_idx);
          return 0;
     }

     if (indelqual_filter->ntests && num_noncons_vars > indelqual_filter->ntests) {
          LOG_WARN("Number of (non consensus) variants larger than number of predefined tests for indelqual filter (%ld > %ld)! Are you sure that makes sense?\n", 
                   num_noncons_vars, indelqual_filter->ntests);
     }

     orig_idx = realloc(orig_idx, (num_noncons_vars * sizeof(long int)));
     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
     noncons_errprobs = realloc(noncons_errprobs, (num_noncons_vars * sizeof(double)));
     if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; }

     /* only now we can set the number of tests (if it wasn't set by
      * caller) */
     if (! indelqual_filter->ntests) {
          indelqual_filter->ntests = num_noncons_vars;
     }

     /* multiple testing correction
      */
     if (indelqual_filter->mtc_type == MTC_BONF) {
          bonf_corr(noncons_errprobs, num_noncons_vars, 
                    indelqual_filter->ntests);
          
     } else if (indelqual_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(noncons_errprobs, num_noncons_vars, 
                         indelqual_filter->alpha, indelqual_filter->ntests);
          
     } else if (indelqual_filter->mtc_type == MTC_FDR) {
          long int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          

          num_rej = fdr(noncons_errprobs, num_noncons_vars, 
                        indelqual_filter->alpha, indelqual_filter->ntests, 
                        &idx_rej);

          /* first pretend none are significant */
          for (i=0; i<num_noncons_vars; i++) {
               noncons_errprobs[i] = DBL_MAX;
          }
          LOG_DEBUG("%ld results significant after fdr\n", num_rej);
          for (i=0; i<num_rej; i++) {
               long int idx = idx_rej[i];
               noncons_errprobs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", indelqual_filter->mtc_type);
          free(orig_idx);
          free(noncons_errprobs);
          return -1;
     }
     
     for (i=0; i<num_noncons_vars; i++) {
          if (noncons_errprobs[i] > indelqual_filter->alpha) {
               vcf_var_add_to_filter(vars[orig_idx[i]], indelqual_filter->id);
          }
     }

     free(orig_idx);
     free(noncons_errprobs);

     return 0;
}