/* Function to call genotypes from post probs NOTE: probs must be normalized, that is, sum (or logsum) to 1! geno : array with probs n_geno : number of genotypes in array log_scale : are the probs in log scale? N_prob_thresh : minimum prob to use data. If highest prob is lower, data set as missing call_prob_thresh : minimum prob to call a genotype. If highest prob is lower, leave geno as probs miss_data : how the missing data is handled 0 = missing data (all genot with equal prob) 1 = sample random genotype 2 = call the highest prob geno (since missing data, probably major/major) */ void call_geno(double *geno, int n_geno, bool log_scale, double N_prob_thresh, double call_prob_thresh, int miss_data){ if(N_prob_thresh > call_prob_thresh) error(__FUNCTION__, "missing data threshold must be smaller than calling genotype threshold!"); int max_pos = array_max_pos(geno, n_geno); int min_pos = array_min_pos(geno, n_geno); double max_pp = (log_scale ? exp(geno[max_pos]) : geno[max_pos]); // If missing data if(geno[min_pos] == geno[max_pos]){ if(miss_data == 0) max_pp = -1; else if(miss_data == 1) max_pos = rand() % 3; } if(max_pp < N_prob_thresh) for (int g = 0; g < n_geno; g++) geno[g] = (log_scale ? log((double) 1/n_geno) : (double) 1/n_geno); if(max_pp >= call_prob_thresh){ for (int g = 0; g < n_geno; g++) geno[g] = (log_scale ? -INF : 0); geno[max_pos] = (log_scale ? log(1) : 1); } }
void call_geno(double *site_gl, int n_ind, int n_geno) { for (int i = 0; i < n_ind; i++) { int max_pos = array_max_pos(&site_gl[i*n_geno], n_geno); for (int j=0; j < n_geno; j++) site_gl[i*n_geno+j] = -1e4; site_gl[i*n_geno+max_pos] = 0; } }