コード例 #1
0
word DecodingLayer::initTokensWord(string dictWord) {
    word ret;
    ret.label = dictWord;
    int S = 2*dictWord.size() + 1;

    for(int i = 0; i < S+2; ++i) //S+2 because I need the special indices: -1 (output token) and 0 (input token)
    {
        vector<token> vecTok; //a row of the matrix -- for a specific segment _i_
        for(int j = 0; j < T; ++j)
        {
            token tok;
            tok.score = logZero; //init all with ln 0 = -inf
            vecTok.push_back(tok); //(-inf, empty-set)
        }
        ret.tok.push_back(vecTok); //add the row to the matrix
    }
    //tok(w, s=1, t=1)
    ret.tok[2][0].score = safe_log( y(0, alphabet[' ']) );
    ret.tok[2][0].history.push_back(dictWord);

    //tok(w, 2, 1)
    ret.tok[3][0].score = safe_log( y(0, alphabet[dictWord[0]]) );
    ret.tok[3][0].history.push_back(dictWord);

    if(dictWord.size() == 1) //init tok(w, -1, 1)
        ret.tok[1][0] = ret.tok[3][0];
    else {
        token tok;
        tok.score = logZero;
        ret.tok[1][0] = tok;
    }
    return ret;
}
コード例 #2
0
ファイル: caller.cpp プロジェクト: arrogantrobot/vg
// This is Equation 2 (tranformed to log) from
// A statistical framework for SNP calling ... , Heng Li, Bioinformatics, 2011
// http://bioinformatics.oxfordjournals.org/content/27/21/2987.full
double Caller::genotype_log_likelihood(const BasePileup& bp,
                                       const vector<pair<int, int> >& base_offsets,
                                       double g, char first, char second) {
    double m = 2.; // always assume two alleles

    double log_likelihood = log(0.25); // 1 / m^2, where m = ploidy = 2;

    const string& bases = bp.bases();
    const string& quals = bp.qualities();
    double perr;

    for (int i = 0; i < base_offsets.size(); ++i) {
        char base = Pileups::extract_match(bp, base_offsets[i].first);
        char qual = base_offsets[i].second >= 0 ? quals[base_offsets[i].second] : _default_quality;
        perr = phred2prob(qual);
        if (base == first) {
            log_likelihood += safe_log((m - g) * perr + g * (1. - perr));
        } else if (base == second) {
            log_likelihood += safe_log((m - g) * (1. - perr) + g * perr);
        } else {
            log_likelihood += safe_log(perr * perr);
        }
    }

    return log_likelihood;
}
コード例 #3
0
ファイル: utils-math.c プロジェクト: tf2/autoclassR
/* LOG_GAMMA
 03nov94 wmt: move into separate file
 19dec94 wmt: return double rather than float
        */
double log_gamma( double x, int low_precision)
{
   if (x > 3.0) {
      if (low_precision == TRUE)
         return(((x - 0.5) * (safe_log(x))) +
		   (-1.0 * x) +
	           0.9189385332046727 +                    /* log(sqrt(2*pi)) */
		   (0.08333333333333333 / x) +                  /* (1/12) / x */
		                                            /* -(1/360 / x^3) */
                   (-1.0 * (0.002777777777777778 / (x*x*x))));
      else
	 return(((x - 0.5) * (safe_log(x))) +
		   (-1.0 * x) +
		   0.9189385332046727 +                    /* log(sqrt(2*pi)) */
		   (0.08333333333333333 / x) +                  /* (1/12) / x */
		  				            /* -(1/360 / x^3) */
		   (-1.0 * (0.002777777777777778 / (x*x*x))) +
		   (0.00007936507936507937 / (x*x*x*x*x)) + /* (1/1260 / x^5) */
					                   /* -(1/1680 / x^7) */
                   (0.00005952380952380953 / pow( x, 7)));
   }
   if ((x == 1.0) || (x == 2.0))
      return(0.0);
   if (x > 0.0)
      return(log_gamma( 3.0 + x, low_precision ) -
	     safe_log((double) (x * (1.0 + x) * (2.0 + x))));
   fprintf( stderr, "Attempted to take log_gamma %20.15f\n", x);
   return 0.0; /*this is not any good but must return something*/
}
コード例 #4
0
ファイル: estimate.c プロジェクト: Rygbee/ctm-c
void maximization(llna_model* model, llna_ss* ss)
{
    int i, j;
    double sum;

    // mean maximization

    for (i = 0; i < model->k-1; i++)
        vset(model->mu, i, vget(ss->mu_ss, i) / ss->ndata);

    // covariance maximization

    for (i = 0; i < model->k-1; i++)
    {
        for (j = 0; j < model->k-1; j++)
        {
            mset(model->cov, i, j,
                 (1.0 / ss->ndata) *
                 (mget(ss->cov_ss, i, j) +
                  ss->ndata * vget(model->mu, i) * vget(model->mu, j) -
                  vget(ss->mu_ss, i) * vget(model->mu, j) -
                  vget(ss->mu_ss, j) * vget(model->mu, i)));
        }
    }
    if (PARAMS.cov_estimate == SHRINK)
    {
        cov_shrinkage(model->cov, ss->ndata, model->cov);
    }
    matrix_inverse(model->cov, model->inv_cov);
    model->log_det_inv_cov = log_det(model->inv_cov);

    // topic maximization

    for (i = 0; i < model->k; i++)
    {
        sum = 0;
        for (j = 0; j < model->log_beta->size2; j++)
            sum += mget(ss->beta_ss, i, j);

        if (sum == 0) sum = safe_log(sum) * model->log_beta->size2;
        else sum = safe_log(sum);

        for (j = 0; j < model->log_beta->size2; j++)
            mset(model->log_beta, i, j, safe_log(mget(ss->beta_ss, i, j)) - sum);
    }
}
コード例 #5
0
ファイル: caller.cpp プロジェクト: arrogantrobot/vg
Caller::Caller(VG* graph,
               double het_prior,
               int min_depth,
               int max_depth,
               int min_support,
               double min_frac,
               double min_likelihood, 
               bool leave_uncalled,
               int default_quality):
    _graph(graph),
    _het_log_prior(safe_log(het_prior)),
    _hom_log_prior(safe_log(.5 * (1. - het_prior))),
    _min_depth(min_depth),
    _max_depth(max_depth),
    _min_support(min_support),
    _min_frac(min_frac),
    _min_log_likelihood(safe_log(min_likelihood)),
    _leave_uncalled(leave_uncalled),
    _default_quality(default_quality) {
    _max_id = _graph->max_node_id();
}
コード例 #6
0
/*
 * returns the element randomly sampled from the log
 * probabilities in array (number is the number of elements)
 */
int log_sample(double* vals, int length) {
  double normalizer = safe_log(0.0);
  int ii;
  for (ii = 0; ii < length; ++ii) {
    normalizer = log_sum(normalizer, vals[ii]);
  }

  double val = 0, sum = 0, cutoff = (double)rand() / ((double)RAND_MAX + 1.0);
  for (ii = 0; ii < length; ++ii) {
    val = exp(vals[ii] - normalizer);
    sum += val;
    if (sum >= cutoff)
      break;
  }
  assert(ii < length);
  return ii;
}
コード例 #7
0
int log_vector_sample(std::vector<double> vals, int length) {
  double normalizer = safe_log(0.0);
  int ii = 0;
  assert(length > 0 && length <= (int)vals.size());
  for (ii = 0; ii < length; ++ii) {
    normalizer = log_sum(normalizer, vals[ii]);
  }

  double val = 0, sum = 0, cutoff = (double)rand() / ((double)RAND_MAX + 1.0);
  for (ii = 0; ii < length; ++ii) {
    val = exp(vals[ii] - normalizer);
    sum += val;
    if (sum >= cutoff)
      break;
  }
  assert(ii < length);
  return ii;
}
コード例 #8
0
ファイル: utils.cpp プロジェクト: riteshkasat/hdp
void vct_log(vct* x) {
    size_t size = x->size();
    for (size_t i = 0; i < size; ++i) x->at(i) = safe_log(x->at(i));
}
コード例 #9
0
vector<string> DecodingLayer::getDecodedLabels() {
    int nbWords = words.size();

    for(int t = 1; t < T; ++t)
    {
        token highestOutputToken = getHighestScoreOutputToken(t);

        for(int i = 0; i < nbWords; ++i)
        {
            words[i].tok[0][t] = highestOutputToken;
            words[i].tok[0][t].history.push_back(words[i].label); //add w to tok(w, 0, t) history

            string w_prime = createExtendedLabel(words[i].label);
            int S = w_prime.size();
            for(int s = 0; s < S; ++s)
            {
//                vector<token> P; //don't used -- compute maxTok directly
                token maxTok = words[i].tok[s+2][t-1];
//                P.push_back(words[i].tok[s+2][t-1]);
//                P.push_back(words[i].tok[s+1][t-1]);
                int prevSeg = //(s == 0) ? 0 :
                              s+1; // s+1 is different for the first segment !! better results without condition

                if(words[i].tok[prevSeg][t-1].score > maxTok.score)
                    maxTok = words[i].tok[prevSeg][t-1];

                if(w_prime[s] != ' ' && s >= 2 && w_prime[s-2] != w_prime[s])
                {
//                    P.push_back(words[i].tok[s][t-1]);
                    if(words[i].tok[s][t-1].score > maxTok.score)
                        maxTok = words[i].tok[s][t-1];
                }
                words[i].tok[s+2][t] = maxTok; //highest scoring token from set P
                words[i].tok[s+2][t].score += safe_log( y(t, alphabet[w_prime[s]]) );
            }

            //compute the highest score
            token maxTok = words[i].tok[S + 1][t];
            if(words[i].tok[S][t].score > maxTok.score)
                maxTok = words[i].tok[S][t];
            words[i].tok[1][t] = maxTok;
        }
    }


    //output the top 10 bestwords
    sortVector(words);
    token maxTok = words[0].tok[1][T-1];
    string bestword = words[0].label;

    cout << "+++ ";
    for(int i = 0; i < maxTok.history.size(); ++i)
        cout << maxTok.history[i] <<  " ";
    cout << "++++\n";

    vector<string> result;
    for(int i = 0; i < 10; ++i)
        result.push_back(words[i].label);

    return result;//maxTok.history;
}
コード例 #10
0
void vct_log(gsl_vector* v) {
	for (unsigned int i = 0; i < v->size; i++) {
		vset(v, i, safe_log(vget(v, i)));
	}
}
コード例 #11
0
ファイル: viterbi.c プロジェクト: tf2/CNsolidate
void viterbi(int *x_T, int *x_N, double *x_A, double *x_Pi, double *mu, double *sigma,
			 double *obs, int *overlap, double *overlaps, int *overlap_ids, int *no_overlaps,
			 int *start_overlaps, int *dist, int *L, int *distance, double *P, int *Q,
			 double *mean_ref, double *sd_min, double *mean_sd, int *prior,
			 double *x_W_A, double *W_Pi) {
		
		int N = *x_N;
		int T = *x_T;
		double A[N][N];
		double W_A[N][N];
		double Pi[N];
		
		double delta[T][N];
		int psi[T][N];
		
		// Fill A and Pi
		for (int i = 0; i < N; i++) {
				for (int j = 0, index = i; j < N; j++, index += N) {
						if (*dist)
								A[i][j] = x_A[index];
						else
								A[i][j] = safe_log(x_A[index]);
						W_A[i][j] = x_W_A[index];
				}
				Pi[i] = safe_log(x_Pi[i]);
		}
				
		// Initialization
		for (int i = 0; i < N; i++) {
				delta[0][i] = emission_prob(obs[0], mu[i], sigma[i], 1) + Pi[i];
		}
		
		// Recursion
		int no_olaps;
		int start;
		double sum_olap;
		double trans;
		if (T > 1) {
				for (int t = 1; t < T; t++) {
						
						no_olaps = no_overlaps[t];
						int olap_ids[no_olaps];
						double olaps[no_olaps];
						start = start_overlaps[t];
						sum_olap = 1.0;
						
						if (*overlap) {
								for (int i = 0; i < no_olaps; i++) {
										olap_ids[i] = overlap_ids[start + i];
										olaps[i] = overlaps[start + i];
										sum_olap += olaps[i];
								}
								olaps[no_olaps-1] = 1.0;
						}
						
						for (int j = 0; j < N; j++) {
								double prev[N];
								if (*dist)
										prev[0] = safe_log(trans_dist(distance[t], A[0][j],
																	  *L, N)) +
												delta[t-1][0];
								else
										prev[0] = A[0][j] + delta[t-1][0];
								double max = prev[0];
								int maxid = 0;
								if (N > 1) {
										for (int i = 1; i < N; i++) {
												if (*dist)
														prev[i] = safe_log(trans_dist(distance[t],
																					  A[i][j],
																					  *L, N)) +
																delta[t-1][i];
												else
														prev[i] = A[i][j] + delta[t-1][i];
												if (prev[i] > max) {
														maxid = i;
														max = prev[i];
												}
										}
								}
								
								psi[t][j] = maxid;

								trans = 0.0;
								
								if (*overlap) {
										int qt[no_olaps];
										if (no_olaps > 1) {
												int q = j;
												int iter = no_olaps-2;
												for (int i = t-1; i >= olap_ids[0]; i--) {
														q = psi[i+1][q];
														if (member(i, olap_ids, no_olaps)) {
																qt[iter] = q;
																iter--;
														}
												}
										}
										qt[no_olaps-1] = j;
										
										int id;
										for (int i = 0; i < no_olaps; i++) {
												id = qt[i];
												trans += emission_prob(obs[t], mu[id], sigma[id], 1) +
														safe_log(olaps[i] / sum_olap);
										}
								}
								else {
										trans = emission_prob(obs[t], mu[j], sigma[j], 1);
								}

								if (*dist)
										delta[t][j] = delta[t-1][psi[t][j]] +
												safe_log(trans_dist(distance[t], A[psi[t][j]][j], *L, N)) +
												trans;
								else
										delta[t][j] = delta[t-1][psi[t][j]] + A[psi[t][j]][j] + trans;
						}
				}
		}
		
		// Termination
		double max = delta[T-1][0];
		int maxid = 0;
		if (N > 1) {
				for (int i = 1; i < N; i++) {
						if (delta[T-1][i] > max) {
								maxid = i;
								max = delta[T-1][i];
						}
				}
		}
		Q[T-1] = maxid;
		*P = delta[T-1][Q[T-1]];

		// Calculate parameter prior probability
		if (*prior) {
				for (int i = 0; i < N; i++) {
						*P += safe_log(Dirichlet(A[i], W_A[i], N));
						*P += safe_log(*sd_min / sigma[i]) +
								emission_prob(mu[i], mean_ref[i], *mean_sd, 1);
				}
				*P += safe_log(Dirichlet(Pi, W_Pi, N));
		}
		
		// Path backtracking
		if (T > 1) {
				for (int t = T-2; t >= 0; t--) {
						Q[t] = psi[t+1][Q[t+1]];
				}
		}
}