Ejemplo n.º 1
0
/* returns -1 on error 
 *
 * filter everything that's not significant
 * 
 * FIXME should be part of lofreq filter.
 *
 */
int 
apply_uniq_filter_mtc(uniq_filter_t *uniq_filter, var_t **vars, const int num_vars)
{
     double *uniq_probs = NULL;
     int i;

     if (uniq_filter->ntests && num_vars > uniq_filter->ntests) {
         LOG_WARN("%s\n", "Number of predefined tests for uniq filter larger than number of variants! Are you sure that makes sense?");
     }

     if (! uniq_filter->ntests) {
          uniq_filter->ntests = num_vars;
     }

     /* collect uniq error probs
      */
     uniq_probs = malloc(num_vars * sizeof(double));
     if ( ! uniq_probs) {
          LOG_FATAL("%s\n", "out of memory");
          exit(1);
     }
     for (i=0; i<num_vars; i++) {
          uniq_probs[i] = PHREDQUAL_TO_PROB(uniq_phred_from_var(vars[i]));
     }

     /* multiple testing correction
      */
     if (uniq_filter->mtc_type == MTC_BONF) {
          bonf_corr(uniq_probs, num_vars, 
                    uniq_filter->ntests);
          
     } else if (uniq_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(uniq_probs, num_vars, 
                         uniq_filter->alpha, uniq_filter->ntests);
          
     } else if (uniq_filter->mtc_type == MTC_FDR) {
          int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          int i;
          
          num_rej = fdr(uniq_probs, num_vars, 
                        uniq_filter->alpha, uniq_filter->ntests, 
                        &idx_rej);
          for (i=0; i<num_rej; i++) {
               int idx = idx_rej[i];
               uniq_probs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", uniq_filter->mtc_type);
          return -1;
     }

     for (i=0; i<num_vars; i++) {
          if (uniq_probs[i] > uniq_filter->alpha) {
               vcf_var_add_to_filter(vars[i], uniq_filter->id);
          }
     }

     free(uniq_probs);

     return 0;
}
Ejemplo n.º 2
0
/* returns -1 on error. 
 *
 * Will set any mtc_quals->sb_qual to -1 if significant (i.e filter!)
 *
 */
int apply_sb_filter_mtc(mtc_qual_t *mtc_quals, sb_filter_t *sb_filter, const long int num_vars)
{
     double *sb_probs = NULL;
     long int i;
     long int num_ign = 0;
     long int *orig_idx = NULL;/* we might ignore some variants (missing values etc). keep track of real indices of kept vars */

     
     /* collect values from vars kept in mem
      */
     sb_probs = malloc(num_vars * sizeof(double));
     if ( ! sb_probs) {LOG_FATAL("%s\n", "out of memory"); return -1;}
     orig_idx = malloc(num_vars * sizeof(long int));
     if ( ! orig_idx) {LOG_FATAL("%s\n", "out of memory"); return -1;}

     num_ign = 0;
     for (i=0; i<num_vars; i++) {          
          /* ignore indels too if sb filter is not to be applied */
          if (! sb_filter->incl_indels && mtc_quals[i].is_indel) {
               num_ign += 1;
               continue;
          }

          sb_probs[i-num_ign] = PHREDQUAL_TO_PROB(mtc_quals[i].sb_qual);
          orig_idx[i-num_ign] = i;
     }
     if (num_vars-num_ign <= 0) {
          free(sb_probs);
          free(orig_idx);
          return 0;
     }
#if 0
     /* realloc to smaller size apparently not guaranteed to free up space so no point really but let's make sure we don't use that memory */
     sb_probs = realloc(sb_probs, (num_vars-num_ign) * sizeof(double));
     if (! sb_probs) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
     orig_idx = realloc(orig_idx, (num_vars-num_ign) * sizeof(long int));
     if (! orig_idx) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
#endif

     if (! sb_filter->ntests) {
          sb_filter->ntests = num_vars - num_ign;
     } else {
          if (num_vars - num_ign > sb_filter->ntests) {
               LOG_WARN("Number of variants (%ld) in SB filter larger than the number of predefined tests (%ld). Are you sure that makes sense?\n",
                        num_vars-num_ign, sb_filter->ntests);
          }
     }


     /* multiple testing correction
      */
     if (sb_filter->mtc_type == MTC_BONF) {
          bonf_corr(sb_probs, num_vars-num_ign, 
                    sb_filter->ntests);
          
     } else if (sb_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(sb_probs, num_vars-num_ign, 
                         sb_filter->alpha, sb_filter->ntests);
          
     } else if (sb_filter->mtc_type == MTC_FDR) {
          long int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          
          num_rej = fdr(sb_probs, num_vars-num_ign, 
                        sb_filter->alpha, sb_filter->ntests, 
                        &idx_rej);

          /* first pretend none are significant */
          for (i=0; i<num_vars-num_ign; i++) {
               sb_probs[i] = DBL_MAX;
          }
          for (i=0; i<num_rej; i++) {
               long int idx = idx_rej[i];
               sb_probs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", sb_filter->mtc_type);
          return -1;
     }
     
     for (i=0; i<num_vars-num_ign; i++) {
          /* note: reverse of qual filters, i.e. qpply filter if sign, and not the other way around! */
          if (sb_probs[i] < sb_filter->alpha) {
               if (sb_filter->no_compound || mtc_quals[orig_idx[i]].is_alt_mostly_on_one_strand) {
                    mtc_quals[orig_idx[i]].sb_qual = -1;
               }
          }
     }

     free(orig_idx);
     free(sb_probs);

     return 0;
}
Ejemplo n.º 3
0
/* returns -1 on error 
 *
 * filter everything that's significant
 *
 * very similar to in apply_snvqual_filter_mtc, but reverse logic and looking at all vars
 */
int apply_sb_filter_mtc(sb_filter_t *sb_filter, var_t **vars, const long int num_vars)
{
     double *sb_probs = NULL;
     long int i;
     long int num_ign = 0;
     long int *orig_idx = NULL;/* we might ignore some variants (missing values etc). keep track of real indices of kept vars */

     
     /* collect values from vars kept in mem
      */
     sb_probs = malloc(num_vars * sizeof(double));
     if ( ! sb_probs) {LOG_FATAL("%s\n", "out of memory"); return -1;}
     orig_idx = malloc(num_vars * sizeof(long int));
     if ( ! orig_idx) {LOG_FATAL("%s\n", "out of memory"); return -1;}

     num_ign = 0;
     for (i=0; i<num_vars; i++) {
          char *sb_char = NULL;
          
          /* ignore indels too if sb filter is not to be applied */
          if (! sb_filter->incl_indels && vcf_var_is_indel(vars[i])) {
               num_ign += 1;
               continue;
          }

          if ( ! vcf_var_has_info_key(&sb_char, vars[i], "SB")) {
               if ( ! sb_missing_warning_printed) {
                    LOG_WARN("%s\n", "At least one variant has no SB tag! SB filtering will be incomplete");
                    sb_missing_warning_printed = 1;
               }
               num_ign += 1;
               continue;
          }

          sb_probs[i-num_ign] = PHREDQUAL_TO_PROB(atoi(sb_char));
          orig_idx[i-num_ign] = i;
          /*LOG_FIXME("orig_idx[i=%ld - num_ign=%ld = %ld] = i=%ld\n", i, num_ign, i-num_ign, i);*/
          free(sb_char);
     }
     if (num_vars-num_ign <= 0) {
          free(sb_probs);
          free(orig_idx);
          return 0;
     }


     /* realloc to smaller size apparently not guaranteed to free up space so no point really but let's make sure we don't use that memory */
     sb_probs = realloc(sb_probs, (num_vars-num_ign) * sizeof(double));
     if (! sb_probs) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
     orig_idx = realloc(orig_idx, (num_vars-num_ign) * sizeof(long int));
     if (! orig_idx) { LOG_FATAL("realloc failed. Exiting..."); return -1; }

     if (! sb_filter->ntests) {
          sb_filter->ntests = num_vars - num_ign;
     } else {
          if (num_vars-num_ign > sb_filter->ntests) {
               LOG_WARN("%s\n", "Number of predefined tests for SB filter larger than number of variants! Are you sure that makes sense?");
          }
     }


     /* multiple testing correction
      */
     if (sb_filter->mtc_type == MTC_BONF) {
          bonf_corr(sb_probs, num_vars-num_ign, 
                    sb_filter->ntests);
          
     } else if (sb_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(sb_probs, num_vars-num_ign, 
                         sb_filter->alpha, sb_filter->ntests);
          
     } else if (sb_filter->mtc_type == MTC_FDR) {
          long int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          
          num_rej = fdr(sb_probs, num_vars-num_ign, 
                        sb_filter->alpha, sb_filter->ntests, 
                        &idx_rej);

          /* first pretend none are significant */
          for (i=0; i<num_vars-num_ign; i++) {
               sb_probs[i] = DBL_MAX;
          }
          LOG_DEBUG("%ld results significant after fdr\n", num_rej);
          for (i=0; i<num_rej; i++) {
               long int idx = idx_rej[i];
               sb_probs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", sb_filter->mtc_type);
          return -1;
     }
     
     for (i=0; i<num_vars-num_ign; i++) {
          if (sb_probs[i] < sb_filter->alpha) {
               if (sb_filter->no_compound || alt_mostly_on_one_strand(vars[orig_idx[i]])) {
                    vcf_var_add_to_filter(vars[orig_idx[i]], sb_filter->id);
               }
          }
     }

     free(orig_idx);
     free(sb_probs);

     return 0;
}
Ejemplo n.º 4
0
/* returns -1 on error 
 *
 * filter everything that's not significant
 * 
 * Very similar to apply_snvqual_filter_mtc, but reverse testing logic and only looking at non consvars
 *
 * Will set any mtc_quals->qual to -1 if significant (i.e. don't filter)
 */
int apply_indelqual_filter_mtc(mtc_qual_t *mtc_quals, indelqual_filter_t *indelqual_filter,  const long int num_vars)
{
     long int *orig_idx = NULL; /* of size num_ign */
     double *errprobs = NULL;
     long int num_ign = 0;
     long int i;

   
     /* collect values from noncons vars only and keep track of their indeces
      */
     errprobs = malloc(num_vars * sizeof(double));
     if ( ! errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; }
     orig_idx = malloc(num_vars * sizeof(long int));
     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }

     num_ign = 0;
     for (i=0; i<num_vars; i++) {
          if (! mtc_quals[i].is_indel) {
               num_ign += 1;
               continue;
          }
          errprobs[i-num_ign] = PHREDQUAL_TO_PROB(mtc_quals[i].var_qual);
          orig_idx[i-num_ign] = i;
     }
     if (num_vars-num_ign <= 0) {
          free(errprobs);
          free(orig_idx);
          return 0;
     }

#if 0
     orig_idx = realloc(orig_idx, (num_ign * sizeof(long int)));
     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
     errprobs = realloc(errprobs, (num_ign * sizeof(double)));
     if ( ! errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; }
#endif

     /* only now we can set the number of tests (if it wasn't set by
      * caller) */
     if (! indelqual_filter->ntests) {
          indelqual_filter->ntests = num_vars-num_ign;
     } else {
          if (num_vars-num_ign > indelqual_filter->ntests) {
               LOG_WARN("Number of variants (%ld) larger than the number of predefined tests (%ld). Are you sure that makes sense?\n",
                        num_vars-num_ign, indelqual_filter->ntests);
          }
     }

     /* multiple testing correction
      */
     if (indelqual_filter->mtc_type == MTC_BONF) {
          bonf_corr(errprobs, num_vars-num_ign, 
                    indelqual_filter->ntests);
          
     } else if (indelqual_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(errprobs, num_vars-num_ign, 
                         indelqual_filter->alpha, indelqual_filter->ntests);
          
     } else if (indelqual_filter->mtc_type == MTC_FDR) {
          long int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          

          num_rej = fdr(errprobs, num_vars-num_ign, 
                        indelqual_filter->alpha, indelqual_filter->ntests, 
                        &idx_rej);
          /* first pretend none are significant */
          for (i=0; i<num_vars-num_ign; i++) {
               errprobs[i] = DBL_MAX;
          }
          for (i=0; i<num_rej; i++) {
               long int idx = idx_rej[i];
               errprobs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", indelqual_filter->mtc_type);
          free(orig_idx);
          free(errprobs);
          return -1;
     }
     
     for (i=0; i<num_vars-num_ign; i++) {
          if (errprobs[i] < indelqual_filter->alpha) {
               mtc_quals[orig_idx[i]].var_qual = -1;
          }
     }

     free(orig_idx);
     free(errprobs);

     return 0;
}
Ejemplo n.º 5
0
/* returns -1 on error 
 *
 * filter everything that's not significant
 * 
 * Very similar to apply_sb_filter_mtc, but reverse testing logic and only looking at non consvars
 *
 */
int apply_indelqual_filter_mtc(indelqual_filter_t *indelqual_filter, var_t **vars, const long int num_vars)
{
     /* can only apply this logic to variants that are not consensus
      * variants, i.e those that actually have a quality. therefore
      * keep track of non cons var indeces */
     long int *orig_idx = NULL; /* of size num_noncons_vars */
     double *noncons_errprobs = NULL;
     long int num_noncons_vars = 0;
     long int i;

     /* FIXME function almost identical to apply_indelqual_filter_mtc just different filter can be easily merged by accepting both types of variants */

     /* collect values from noncons vars only and keep track of their indeces
      */
     orig_idx = malloc(num_vars * sizeof(long int));
     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
     noncons_errprobs = malloc(num_vars * sizeof(double));
     if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1;
     }
     num_noncons_vars = 0;
     for (i=0; i<num_vars; i++) {
          if (vars[i]->qual>-1 && vcf_var_has_info_key(NULL, vars[i], "INDEL")) {
               noncons_errprobs[num_noncons_vars] = PHREDQUAL_TO_PROB(vars[i]->qual);
               orig_idx[num_noncons_vars] = i;
               num_noncons_vars += 1;
          }
     }
     if (! num_noncons_vars) {
          free(noncons_errprobs);
          free(orig_idx);
          return 0;
     }

     if (indelqual_filter->ntests && num_noncons_vars > indelqual_filter->ntests) {
          LOG_WARN("Number of (non consensus) variants larger than number of predefined tests for indelqual filter (%ld > %ld)! Are you sure that makes sense?\n", 
                   num_noncons_vars, indelqual_filter->ntests);
     }

     orig_idx = realloc(orig_idx, (num_noncons_vars * sizeof(long int)));
     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
     noncons_errprobs = realloc(noncons_errprobs, (num_noncons_vars * sizeof(double)));
     if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; }

     /* only now we can set the number of tests (if it wasn't set by
      * caller) */
     if (! indelqual_filter->ntests) {
          indelqual_filter->ntests = num_noncons_vars;
     }

     /* multiple testing correction
      */
     if (indelqual_filter->mtc_type == MTC_BONF) {
          bonf_corr(noncons_errprobs, num_noncons_vars, 
                    indelqual_filter->ntests);
          
     } else if (indelqual_filter->mtc_type == MTC_HOLMBONF) {
          holm_bonf_corr(noncons_errprobs, num_noncons_vars, 
                         indelqual_filter->alpha, indelqual_filter->ntests);
          
     } else if (indelqual_filter->mtc_type == MTC_FDR) {
          long int num_rej = 0;
          long int *idx_rej; /* indices of rejected i.e. significant values */
          

          num_rej = fdr(noncons_errprobs, num_noncons_vars, 
                        indelqual_filter->alpha, indelqual_filter->ntests, 
                        &idx_rej);

          /* first pretend none are significant */
          for (i=0; i<num_noncons_vars; i++) {
               noncons_errprobs[i] = DBL_MAX;
          }
          LOG_DEBUG("%ld results significant after fdr\n", num_rej);
          for (i=0; i<num_rej; i++) {
               long int idx = idx_rej[i];
               noncons_errprobs[idx] = -1;
          }
          free(idx_rej);
          
     } else {
          LOG_FATAL("Internal error: unknown MTC type %d\n", indelqual_filter->mtc_type);
          free(orig_idx);
          free(noncons_errprobs);
          return -1;
     }
     
     for (i=0; i<num_noncons_vars; i++) {
          if (noncons_errprobs[i] > indelqual_filter->alpha) {
               vcf_var_add_to_filter(vars[orig_idx[i]], indelqual_filter->id);
          }
     }

     free(orig_idx);
     free(noncons_errprobs);

     return 0;
}