Example #1
0
/* Function to call genotypes from post probs
   NOTE: probs must be normalized, that is, sum (or logsum) to 1!
      geno              : array with probs
      n_geno            : number of genotypes in array
      log_scale         : are the probs in log scale?
      N_prob_thresh     : minimum prob to use data.
                          If highest prob is lower, data set as missing
      call_prob_thresh  : minimum prob to call a genotype. 
                          If highest prob is lower, leave geno as probs
      miss_data         : how the missing data is handled
          0 = missing data (all genot with equal prob)
	  1 = sample random genotype
	  2 = call the highest prob geno (since missing data, probably major/major)
*/
void call_geno(double *geno, int n_geno, bool log_scale, double N_prob_thresh, double call_prob_thresh, int miss_data){
  if(N_prob_thresh > call_prob_thresh)
    error(__FUNCTION__, "missing data threshold must be smaller than calling genotype threshold!");

  int max_pos = array_max_pos(geno, n_geno);
  int min_pos = array_min_pos(geno, n_geno);
  double max_pp = (log_scale ? exp(geno[max_pos]) : geno[max_pos]);

  // If missing data
  if(geno[min_pos] == geno[max_pos]){
    if(miss_data == 0)
      max_pp = -1;
    else if(miss_data == 1)
      max_pos = rand() % 3;
  }


  if(max_pp < N_prob_thresh)
    for (int g = 0; g < n_geno; g++)
      geno[g] = (log_scale ? log((double) 1/n_geno) : (double) 1/n_geno);


  if(max_pp >= call_prob_thresh){
    for (int g = 0; g < n_geno; g++)
      geno[g] = (log_scale ? -INF : 0);

    geno[max_pos] = (log_scale ? log(1) : 1);
  }
}
Example #2
0
void call_geno(double *site_gl, int n_ind, int n_geno) {

	for (int i = 0; i < n_ind; i++) {
		int max_pos = array_max_pos(&site_gl[i*n_geno], n_geno);

        for (int j=0; j < n_geno; j++) site_gl[i*n_geno+j] = -1e4;
        site_gl[i*n_geno+max_pos] = 0;
	}
}