/* Calculates the minimum number of inliers as a function of the number of putative correspondences. Based on equation (7) in Chum, O. and Matas, J. Matching with PROSAC -- Progressive Sample Consensus. In <EM>Conference on Computer Vision and Pattern Recognition (CVPR)</EM>, (2005), pp. 220--226. @param n number of putative correspondences @param m min number of correspondences to compute the model in question @param p_badsupp prob. that a bad model is supported by a correspondence @param p_badxform desired prob. that the final transformation returned is bad @return Returns the minimum number of inliers required to guarantee, based on p_badsupp, that the probability that the final transformation returned by RANSAC is less than p_badxform */ static int calc_min_inliers( int n, int m, double p_badsupp, double p_badxform ) { //根据论文:Chum, O. and Matas, J. Matching with PROSAC -- Progressive Sample Consensus //中的一个公式计算,看不懂 double pi, sum; int i, j; for( j = m+1; j <= n; j++ ) { sum = 0; for( i = j; i <= n; i++ ) { pi = ( i - m ) * log( p_badsupp ) + ( n - i + m ) * log( 1.0 - p_badsupp ) + log_factorial( n - m ) - log_factorial( i - m ) - log_factorial( n - i ); /* * Last three terms above are equivalent to log( n-m choose i-m ) */ sum += exp( pi ); } if( sum < p_badxform ) break; } return j; }
//under a model where the first allele is the true one and second is seq error // epsilon = (1-e)^k // delta = e(1-e)^(k-1) // lambda = expected_covg/mean_read_len double get_biallelic_log_lik(Covg covg_model_true,//covg on allele the model says is true Covg covg_model_err, double lambda_g, double lambda_e, int kmer) { //Under this model, covg on th true allele is Poisson distributed //with rate at true allele r_t = (1-e)^k * depth/read-length =: lambda_g double r_t = lambda_g; // P(covg_model_true) = exp(-r_t) * (r_t)^covg_model_true /covg_model_true! double log_lik_true_allele = -r_t + covg_model_true*log(r_t) - log_factorial(covg_model_true); //Covg on the err allele is Poisson distributed with // rate at error allele = e * (1-e)^(k-1) * (D/R) /3 =: lambda_e double r_e = lambda_e; double log_lik_err_allele = -r_e + covg_model_err*log(r_e) - log_factorial(covg_model_err); return log_lik_true_allele+log_lik_err_allele; }
double beta_func(double alpha, double beta) { double l1 = log_factorial(alpha); double l2 = log_factorial(beta); double l3 = log_factorial(alpha+beta); return exp(l1+l2-l3); }
double log_combination_k_r (unsigned int k, double r) { if (!k) return 0; double val = lgamma (k + r) - lgamma (r) - log_factorial (k); return val; }
double calculate_likelihood(const std::array<double, INIT_NUM_STYPES + 1> &expected, const std::array<int, INIT_NUM_STYPES + 1> &observed) { int n = std::accumulate(observed.begin(), observed.end(), 0); double log_likelihood = log_factorial(n); for(int i = 0; i < INIT_NUM_STYPES + 1; i++) { if(i != HFLU_INDEX) { log_likelihood -= log_factorial(observed[i]); log_likelihood += observed[i] * log(expected[i]); } } return log_likelihood; }
/*计算保证RANSAC最终计算出的转换矩阵错误的概率小于p_badxform所需的最小内点数目 参数: n:推定的匹配点对的个数 m:计算模型所需的最小点对个数 p_badsupp:概率,错误模型被一个匹配点对支持的概率 p_badxform:概率,最终计算出的转换矩阵是错误的的概率 返回值:保证RANSAC最终计算出的转换矩阵错误的概率小于p_badxform所需的最小内点数目 */ static int calc_min_inliers(int n, int m, double p_badsupp, double p_badxform) { double pi, sum; int i, j; for (j = m + 1; j <= n; j++) { sum = 0; for (i = j; i <= n; i++) { pi = (i - m) * log(p_badsupp) + (n - i + m) * log(1.0 - p_badsupp) + log_factorial(n - m) - log_factorial(i - m) - log_factorial(n - i); sum += exp(pi); } if (sum < p_badxform) break; } return j; }
/* Calculates the minimum number of inliers as a function of the number of putative correspondences. Based on equation (7) in Chum, O. and Matas, J. Matching with PROSAC -- Progressive Sample Consensus. In <EM>Conference on Computer Vision and Pattern Recognition (CVPR)</EM>, (2005), pp. 220--226. @param n number of putative correspondences @param m min number of correspondences to compute the model in question @param p_badsupp prob. that a bad model is supported by a correspondence @param p_badxform desired prob. that the final transformation returned is bad @return Returns the minimum number of inliers required to guarantee, based on p_badsupp, that the probability that the final transformation returned by RANSAC is less than p_badxform */ static int calc_min_inliers( int n, int m, double p_badsupp, double p_badxform ) { double pi, sum; int i, j; for( j = m+1; j <= n; j++ ) { sum = 0; for( i = j; i <= n; i++ ) { pi = (i-m) * log( p_badsupp ) + (n-i+m) * log( 1.0 - p_badsupp ) + log_factorial( n - m ) - log_factorial( i - m ) - log_factorial( n - i ); /* * Last three terms above are equivalent to log( n-m choose i-m ) */ sum += exp( pi ); } if( sum < p_badxform ) break; } return j; }
double ignore_get_log_bayesfactor_varmodel_over_repeatmodel(AnnotatedPutativeVariant* annovar, GraphAndModelInfo* model_info) { //uint64_t total_covg_across_colours = get_total_coverage_across_colours(model_info->ginfo, model_info->genome_len); double mu = model_info->mu; //lambda = c_tot * len /2R double len = (double) annovar->len_start; //double lambda = ((double) total_covg_across_colours) * len /((double) ()); long long lambda_s[NUMBER_OF_COLOURS]; long long total_covg_across_samples=0; int i; for (i=0; i<NUMBER_OF_COLOURS; i++) { //lambda_s = c_s * len /2R - c_s is DEPTH of covg (i.e. total_seq/genome_len) long long depth = model_info->ginfo->total_sequence[i] / model_info->genome_len; int read_len = model_info->ginfo->mean_read_length[i]; lambda_s[i]= (depth * len /(2*read_len)) ; total_covg_across_samples += depth; } long long total_depth=total_covg_across_samples; long long lambda =( total_depth * len /( 2*get_mean_readlen_across_colours(model_info->ginfo) )); double alpha = (double) lambda -log(1-mu); int theta1_bar = 0; int theta2_bar=0; for (i=0; i<NUMBER_OF_COLOURS; i++) { theta1_bar += annovar->br1_uniq_covg[i]; theta2_bar += annovar->br2_uniq_covg[i]; } double log_prob_repeat = log(mu); // + gsl_sf_lnfact(theta1_bar) + gsl_sf_lnfact(theta2_bar) // - log(annovar->BigThetaUniq + 1) - log(alpha) ; //(annovar->BigThetaUniq + 1)*log(alpha); for (i=0; i<NUMBER_OF_COLOURS; i++) { int thet = annovar->br1_uniq_covg[i]+ annovar->br2_uniq_covg[i]; log_prob_repeat += thet * log(lambda_s[i]/alpha) ; //- gsl_sf_lnfact(annovar->br1_uniq_covg[i]) - gsl_sf_lnfact(annovar->br2_uniq_covg[i]); } printf("\nrepeat component is %f\n", log_prob_repeat); double log_prob_var = 0; /* // using exactly Gils model - covg as poisson, copy num2, plus allelic baance log_prob_var = -2*lambda + (annovar->BigThetaUniq) * log(2); for (i=0; i<NUMBER_OF_COLOURS; i++) { int thet = annovar->br1_uniq_covg[i]+ annovar->br2_uniq_covg[i]; log_prob_var += thet * log(lambda_s[i]) - log_factorial(thet) ; } double varsum=0; double zeta = 0.128; double p; for (p=0.02; p<=0.98; p+= 0.02) { double prod=1; for (i=0; i<NUMBER_OF_COLOURS; i++) { double per_sample_sum=0; if (annovar->br1_uniq_covg[i]==0) { per_sample_sum += p*p; } if (annovar->br2_uniq_covg[i]==0) { per_sample_sum += (1-p)*(1-p); } printf("Pow is %f, so add %f to the per sample sum \n", pow(0.5, annovar->br1_uniq_covg[i] + annovar->br2_uniq_covg[i]), 2*p*(1-p)*pow(0.5, annovar->br1_uniq_covg[i] + annovar->br2_uniq_covg[i])); per_sample_sum += 2*p*(1-p)*pow(0.5, annovar->br1_uniq_covg[i] + annovar->br2_uniq_covg[i]); prod = prod*per_sample_sum; printf("Prod becomes %f\n", prod); } varsum += prod*p/(1-p); } printf("Varsum is %f\n", varsum); log_prob_var += log(varsum*zeta); */ // Gil's model, but choose Max Likelihood frequency, and condition on that, so not multiplying // hundreds of small numbers log_prob_var = -2*lambda + (annovar->BigThetaUniq) * log(2); for (i=0; i<NUMBER_OF_COLOURS; i++) { int thet = annovar->br1_uniq_covg[i]+ annovar->br2_uniq_covg[i]; log_prob_var += thet * log(lambda_s[i]) - log_factorial(thet) ; } //double varsum=0; //double zeta = 0.128; double p; double max_lik_p=0; double current_best=0; for (p=0.02; p<=0.98; p+= 0.02) { double prod=1; for (i=0; i<NUMBER_OF_COLOURS; i++) { double per_sample_sum=0; if (annovar->br1_uniq_covg[i]==0) { per_sample_sum += p*p; } if (annovar->br2_uniq_covg[i]==0) { per_sample_sum += (1-p)*(1-p); } per_sample_sum += 2*p*(1-p)*pow(0.5, annovar->br1_uniq_covg[i] + annovar->br2_uniq_covg[i]); prod = prod*per_sample_sum; printf("Prod becomes %f\n", prod); } if (prod>current_best) { current_best=prod; max_lik_p=p; } } //then having chosen frequency, apply it double prod=1; for (i=0; i<NUMBER_OF_COLOURS; i++) { double per_sample_sum=0; if (annovar->br1_uniq_covg[i]==0) { per_sample_sum += max_lik_p*max_lik_p; } if (annovar->br2_uniq_covg[i]==0) { per_sample_sum += (1-max_lik_p)*(1-max_lik_p); } per_sample_sum += 2*max_lik_p*(1-max_lik_p)*pow(0.5, annovar->br1_uniq_covg[i] + annovar->br2_uniq_covg[i]); prod=prod*per_sample_sum; } log_prob_var += log(prod); /* //this is the one we like double p; double prob_var=0; double zeta = 0.128; //1 over integral 0.02 to 0.98 of 1/x(1-x) for (p=0.02 ; p<=0.98 ; p += 0.02) { int colour; double product = 1; for (colour=0; colour<NUMBER_OF_COLOURS; colour++) { // printf("First %f\n", exp(annovar->gen_log_lh[colour].log_lh[hom_one]) ); // printf("Second %f\n", exp(annovar->gen_log_lh[colour].log_lh[het]) ); // printf("Third %f\n", exp(annovar->gen_log_lh[colour].log_lh[hom_other]) ); product = product * ( p*exp(annovar->gen_log_lh[colour].log_lh[hom_one])/(1-p) + 2*exp(annovar->gen_log_lh[colour].log_lh[het]) + (1-p)*exp(annovar->gen_log_lh[colour].log_lh[hom_other])/p ); } prob_var += product*zeta; } printf("prob var is %.12f and log of it is %f\n", prob_var, log(prob_var)); log_prob_var = log(prob_var); // if we want to multiply by permuations: + log_factorial(NUMBER_OF_COLOURS);; */ /* double K1=0.375; double K2=0.246; double K3=0.375; double zeta=0.128; */ /* double sum=0; int colour; for (colour=0; colour<NUMBER_OF_COLOURS; colour++) { local_sum += K1 * exp(annovar->gen_log_lh[colour].log_lh[hom_one]) + K2 * exp(annovar->gen_log_lh[colour].log_lh[het]) + K3 * exp(annovar->gen_log_lh[colour].log_lh[hom_other]); sum = sum + log(local_sum); } */ /* //let's trust the genotyping, since we are assuming the var model. So work out the allele frequency of branch1 and branch2 int max_lik_num_chroms_with_br1 = 0; int num_hets=0; for (i=0; i<NUMBER_OF_COLOURS; i++) { if (annovar->genotype[i]==hom_one) { max_lik_num_chroms_with_br1 +=2; } else if (annovar->genotype[i]==het) { num_hets++; max_lik_num_chroms_with_br1 +=1; } } if (max_lik_num_chroms_with_br1>0.98*2*NUMBER_OF_COLOURS) { max_lik_num_chroms_with_br1=(int) (0.98 * 2*NUMBER_OF_COLOURS); } double freq_br1 = ((double)max_lik_num_chroms_with_br1)/(2*NUMBER_OF_COLOURS); int colour; double zeta = 0.128; double prob_of_this_allele_freq = zeta/(freq_br1 * (1-freq_br1)); double prob_var_given_freq=1; for (colour=0; colour<NUMBER_OF_COLOURS; colour++) { */ /* double tmp; if (annovar->genotype[colour]==het) { tmp = 2*freq_br1*(1-freq_br1) ; } else if (annovar->genotype[colour]==hom_one) { tmp = freq_br1*freq_br1 ; } else { tmp = (1-freq_br1)*(1-freq_br1) ; } log_prob_var += log(tmp) + annovar->gen_log_lh[colour].log_lh[annovar->genotype[colour]]; */ /* prob_var_given_freq =prob_var_given_freq* ((2*freq_br1*(1-freq_br1)) * exp(annovar->gen_log_lh[colour].log_lh[het]) + freq_br1*freq_br1 * exp(annovar->gen_log_lh[colour].log_lh[hom_one]) + (1-freq_br1)*(1-freq_br1) * exp(annovar->gen_log_lh[colour].log_lh[hom_other]) ) ; } //printf("prob_var_given_freq is %f\n", prob_var_given_freq); log_prob_var += log(prob_of_this_allele_freq) + log(prob_var_given_freq) ; //printf("before Var element is %f\n", log_prob_var); //printf("log-factorial NUMBER_OF_COLOURS is %f\n", log_factorial(NUMBER_OF_COLOURS) ); // log_prob_var += log_factorial(NUMBER_OF_COLOURS) ; //printf("1 after Var element is %f\n", log_prob_var); //printf("log_factorial(num_hets) is %f\n", log_factorial(num_hets) ); //log_prob_var -= log_factorial(num_hets); //printf("2 after Var element is %f\n", log_prob_var); //printf("log_factorial(NUMBER_OF_COLOURS-num_hets) is %f\n", log_factorial(NUMBER_OF_COLOURS-num_hets)); //log_prob_var -= log_factorial(NUMBER_OF_COLOURS-num_hets); //printf("3 after Var element is %f\n", log_prob_var); */ /* //be blunt - look for excess hets only //just do a count o hets int max_lik_num_chroms_with_br1 = 0; int num_hets=0; for (i=0; i<NUMBER_OF_COLOURS; i++) { if (annovar->genotype[i]==hom_one) { max_lik_num_chroms_with_br1 +=2; } else if (annovar->genotype[i]==het) { num_hets++; max_lik_num_chroms_with_br1 +=1; } } double freq_br1 = ((double)max_lik_num_chroms_with_br1)/(2*NUMBER_OF_COLOURS); //binomial distribution for this many hets out of NUMBER_OF_COLOURS individuals, at this given allele frequency (chosen by Max Lik) //either use the MAx LIk freq only: // log_prob_var = log_factorial(NUMBER_OF_COLOURS) - log_factorial(NUMBER_OF_COLOURS-num_hets) // -log_factorial(num_hets) // + num_hets*log(2*freq_br1*(1-freq_br1)) + (NUMBER_OF_COLOURS-num_hets)*log(1 - 2*freq_br1*(1-freq_br1) ); //or sum over them all double zeta=0.128; log_prob_var = log_factorial(NUMBER_OF_COLOURS) - log_factorial(NUMBER_OF_COLOURS-num_hets) -log_factorial(num_hets); double p; for (p=0.02; p<=0.98; p+=0.02) { log_prob_var += num_hets*log(2*p*(1-p)) + (NUMBER_OF_COLOURS-num_hets)*log(1-2*p*(1-p)) -log(p)-log(1-p) + log(zeta); } */ printf("Var bit is %f\n", log_prob_var); return log_prob_var - log_prob_repeat; }
void coexist_bt_error (Packet * pkptr) { double pe, r, p_accum, p_exact; double data_rate, elap_time; double log_p1, log_p2, log_arrange; int seg_size, num_errs, prev_num_errs; int invert_errors = OPC_FALSE; /* Coexist added */ List* error_list; double rx_start_time; int first_bit; int i, j, new_loc; int* error_location; int* prev_error_loc; char pk_type[30]; Format_Information* format_info; extern Initialize_Formats; extern Format_List; extern Read_Formats(); extern Get_Format_Info(char*); /** Compute the number of errors assigned to a segment of bits within **/ /** a packet based on its length and the bit error probability. **/ FIN (coexist_btr_error (pkptr)); /* Obtain the expected Bit-Error-Rate 'pe' */ pe = op_td_get_dbl (pkptr, OPC_TDA_RA_BER); /* Calculate time elapsed since last BER change */ elap_time = op_sim_time () - op_td_get_dbl (pkptr, OPC_TDA_RA_SNR_CALC_TIME); /* Use datarate to determine how many bits in the segment. */ data_rate = op_td_get_dbl (pkptr, OPC_TDA_RA_RX_DRATE); seg_size = elap_time * data_rate; /* Coexist - Calculate the first bit location of this segment */ rx_start_time = op_td_get_dbl(pkptr, OPC_TDA_RA_START_RX); first_bit = (int) floor((op_td_get_dbl (pkptr, OPC_TDA_RA_SNR_CALC_TIME) - rx_start_time) * data_rate); /* Case 1: if the bit error rate is zero, so is the number of errors. */ if (pe == 0.0 || seg_size == 0) num_errs = 0; /* Case 2: if the bit error rate is 1.0, then all the bits are in error. */ /* (note however, that bit error rates should not normally exceed 0.5). */ else if (pe >= 1.0) num_errs = seg_size; /* Case 3: The bit error rate is not zero or one. */ else { /* If the bit error rate is greater than 0.5 and less than 1.0, invert */ /* the problem to find instead the number of bits that are not in error */ /* in order to accelerate the performance of the algorithm. Set a flag */ /* to indicate that the result will then have to be inverted. */ if (pe > 0.5) { pe = 1.0 - pe; invert_errors = OPC_TRUE; } /* The error count can be obtained by mapping a uniform random number */ /* in [0, 1[ via the inverse of the cumulative mass function (CMF) */ /* for the bit error count distribution. */ /* Obtain a uniform random number in [0, 1[ to represent */ /* the value of the CDF at the outcome that will be produced. */ r = op_dist_uniform (1.0); /* Integrate probability mass over possible outcomes until r is exceeded. */ /* The loop iteratively corresponds to "inverting" the CMF since it finds */ /* the bit error count at which the CMF first meets or exceeds the value r. */ for (p_accum = 0.0, num_errs = 0; num_errs <= seg_size; num_errs++) { /* Compute the probability of exactly 'num_errs' bit errors occurring. */ /* The probability that the first 'num_errs' bits will be in error */ /* is given by pow (pe, num_errs). Here it is obtained in logarithmic */ /* form to avoid underflow for small 'pe' or large 'num_errs'. */ log_p1 = (double) num_errs * log (pe); /* Similarly, obtain the probability that the remaining bits will not */ /* be in error. The combination of these two events represents one */ /* possible configuration of bits yielding a total of 'num_errs' errors.*/ log_p2 = (double) (seg_size - num_errs) * log (1.0 - pe); /* Compute the number of arrangements that are possible with the same */ /* number of bits in error as the particular case above. Again obtain */ /* this number in logarithmic form (to avoid overflow in this case). */ /* This result is expressed as the logarithmic form of the formula for */ /* the number N of combinations of k items from n: N = n!/(n-k)!k! */ log_arrange = log_factorial (seg_size) - log_factorial (num_errs) - log_factorial (seg_size - num_errs); /* Compure the probability that exactly 'num_errs' are present */ /* in the segment of bits, in any arrangement. */ p_exact = exp (log_arrange + log_p1 + log_p2); /* Add this to the probability mass accumulated so far for previously */ /* tested outcomes to obtain the value of the CMF at outcome = num_errs.*/ p_accum += p_exact; /*'num_errs' is the outcome for this trial if the CMF meets or exceeds */ /* the uniform random value selected earlier. */ if (p_accum >= r) break; } /* If the bit error rate was inverted to compute correct bits instead, then */ /* Reinvert the result to obtain the number of bits in error. */ if (invert_errors == OPC_TRUE) num_errs = seg_size - num_errs; } /* Increase number of bit errors in packet transmission data attribute. */ prev_num_errs = op_td_get_int (pkptr, OPC_TDA_RA_NUM_ERRORS); op_td_set_int (pkptr, OPC_TDA_RA_NUM_ERRORS, num_errs + prev_num_errs); op_pk_format (pkptr, pk_type); /* Coexist - If there are errors in this segment allocate thier location */ if (num_errs > 0 && (pk_type[0] != 'w' && pk_type[1] != 'l')) { /* Initialize the format list if this is the first invocation */ if (Initialize_Formats == OPC_TRUE) { Format_List = Read_Formats(); Initialize_Formats = OPC_FALSE; } /* Get the format information */ /* We'll use this info to bail on the first bit found in the */ /* payload of a Bluetooth DH(1|3|5) packet to increase performance */ format_info = Get_Format_Info(pk_type); /* There are some errors */ if (prev_num_errs == 0) { /* We not gotten any errors yet so we need to create the list */ error_list = op_prg_list_create(); } else { /* We have previous errors so we need to retrive that list */ error_list = (List*) op_td_get_ptr(pkptr, OPC_TDA_RA_MAX_INDEX + COEXIST_FIELD_LIST_OF_ERRORS); } /* Allocate each error */ for (i = 0; num_errs > i; i++) { error_location = (int*) op_prg_mem_alloc(sizeof(int)); /* Set up the new location while loop */ new_loc = OPC_FALSE; while (new_loc == OPC_FALSE) { new_loc = OPC_TRUE; /* Roll the dice on the location in the segment */ *error_location = ((int) floor(op_dist_uniform(seg_size))) + first_bit; /* Check to see if this is a Bluetooth DH(1|3|5) packet */ if (format_info->crc == OPC_TRUE && format_info->ecc_type == FEC_NONE) { /* check to see if the error falls in the payload */ if (*error_location >= (format_info->preamble_bits + format_info->header_bits)) { /* discontinue further error allocations, since a single */ /* error to the payload of this packet will cause it to fail. */ i = num_errs; prev_num_errs = num_errs; } } /* Make sure this error is not in the same place as a prevous selection */ for (j = prev_num_errs; j < i; j++) { prev_error_loc = (int*) op_prg_list_access(error_list, j); if (*prev_error_loc == *error_location) { /* This was already determined to be an error location */ new_loc = OPC_FALSE; break; /* j loop */ } } /* j loop */ } /* while loop */ /* This location is unique */ op_prg_list_insert(error_list, error_location, OPC_LISTPOS_TAIL); } /* i loop */ /* Set the list back into the TD attributes */ op_td_set_ptr(pkptr, OPC_TDA_RA_MAX_INDEX + COEXIST_FIELD_LIST_OF_ERRORS, error_list); } /* Assign actual (allocated) bit-error rate over tested segment. */ if (seg_size != 0) op_td_set_dbl (pkptr, OPC_TDA_RA_ACTUAL_BER, (double) num_errs / seg_size); else op_td_set_dbl (pkptr, OPC_TDA_RA_ACTUAL_BER, pe); FOUT; }
double poisson (int x, double mu) { return (exp(-mu+x*log(mu)- log_factorial(x))); }
double log_binomial(double n,double k) { return (log_factorial(n) - log_factorial(k) - log_factorial(n-k)); }
int main(){ int n; scanf("%d",&n); printf("%f",log_factorial(n)); return 0; }