예제 #1
0
파일: rhmmC.c 프로젝트: B-Rich/tcoffee
double baum_welch_Ntrain (Hmm *H)
{
  int **stab;
  int a, b;
  double cP,bP;

  UNPACK_HMM(H);
  stab=declare_int (nrounds,L+1);
  for (a=0; a<nrounds; a++)
    {
      model2modelR(H);
      cP=baum_welch_train(H, a+1);
      viterbi  (H);
      posterior(H);
      for (b=1; b<L; b++)stab[a][b]=H->VT[b];
      if (a==0 || cP>bP)
	{
	  dump_model    (H);
	  dump_viterbi  (H);
	  dump_posterior(H);
	  bP=cP;
	}
    }
  fprintf (stderr, "---- SUMMARY: Best: %10.3f STABILITY: %.3f\n", bP,analyze_stability (stab,nrounds, L, 100));
  H->P=bP;
  return H->P;
}
예제 #2
0
 typename std::enable_if<is_dataset<Dataset>::value, real_type>::type
 real_type conditional_log(const Dataset& ds) const {
   real_type result(0);
   for (const auto& p : ds.assignments(arguments())) {
     result += posterior(p.first).log(p.first) * p.second;
   }
   return result;
 }
예제 #3
0
파일: naive_bayes.hpp 프로젝트: libgm/libgm
 real_type log(iterator_range<ObsIt> observations,
               iterator_range<LabIt>& labels) const {
   real_type result = 0;
   for (const auto& p : zip(observations, labels)) {
     result += posterior(p.first).log(p.second);
   }
   return result;
 }
예제 #4
0
파일: naive_bayes.hpp 프로젝트: libgm/libgm
 real_type accuracy(iterator_range<ObsIt> observations,
                    iterator_range<LabIt>& labels) const {
   real_type result(0);
   real_type weight(0);
   for (const auto& p : zip(observations, labels)) {
     result += posterior(p.first).arg_max() == p.second;
     weight += real_type(1);
   }
   return result / weight;
 }
예제 #5
0
파일: rhmmC.c 프로젝트: B-Rich/tcoffee
/////////////////////////////////////////////////////////////////////////////
//                                                                         //
//                                                                         //
//                                Decoding                                 //
//                                                                         //
//                                                                         //
/////////////////////////////////////////////////////////////////////////////	  
double decode (Hmm *H)
{
  posterior (H);
  viterbi   (H);
  
  dump_model(H);
  dump_viterbi(H);
  dump_posterior(H);
  return H->PP;
}
예제 #6
0
int main(int argc, char ** argv) {
  MPI_Init(&argc, &argv);

  QUESO::FullEnvironment env(MPI_COMM_WORLD, "", "", NULL);

  QUESO::VectorSpace<QUESO::GslVector, QUESO::GslMatrix> paramSpace(env,
      "space_", 1, NULL);

  QUESO::GslVector minBound(paramSpace.zeroVector());
  minBound[0] = -10.0;

  QUESO::GslVector maxBound(paramSpace.zeroVector());
  maxBound[0] = 10.0;

  QUESO::BoxSubset<QUESO::GslVector, QUESO::GslMatrix> domain("", paramSpace,
      minBound, maxBound);

  QUESO::UniformVectorRV<QUESO::GslVector, QUESO::GslMatrix> prior("", domain);

  Likelihood<QUESO::GslVector, QUESO::GslMatrix> likelihood("", domain);

  QUESO::GenericVectorRV<QUESO::GslVector, QUESO::GslMatrix> posterior("",
      domain);

  QUESO::StatisticalInverseProblem<QUESO::GslVector, QUESO::GslMatrix> ip("",
      NULL, prior, likelihood, posterior);

  QUESO::GslVector initialValues(paramSpace.zeroVector());
  initialValues[0] = 9.0;

  QUESO::GslMatrix proposalCovarianceMatrix(paramSpace.zeroVector());
  proposalCovarianceMatrix(0, 0) = 1.0;

  ip.seedWithMAPEstimator();
  ip.solveWithBayesMetropolisHastings(NULL, initialValues,
      &proposalCovarianceMatrix);

  // The first sample should be the seed
  QUESO::GslVector first_sample(paramSpace.zeroVector());
  posterior.realizer().realization(first_sample);

  // Looser tolerance for the derivative calculated by using a finite
  // difference
  if (std::abs(first_sample[0]) > 1e-5) {
    std::cerr << "seedWithMAPEstimator failed.  Seed was: " << first_sample[0]
              << std::endl;
    std::cerr << "Actual seed should be 0.0" << std::endl;
    queso_error();
  }

  return 0;
}
예제 #7
0
파일: Estep.cpp 프로젝트: jacobxk/mirt
void _Estep(vector<double> &expected, vector<double> &r1vec, const vector<double> &prior,
    const vector<int> &r, const IntegerMatrix &data, const NumericMatrix &itemtrace,
    const int &ncores)
{
    const int nquad = prior.size();
    const int nitems = data.ncol();
    const int npat = r.size();
    #ifdef SUPPORT_OPENMP
    if(nquad * nitems > 1000){
        omp_set_num_threads(ncores);
    } else {
        omp_set_num_threads(1);
    }
    #endif

    #pragma omp parallel for
    for (int pat = 0; pat < npat; ++pat){
        vector<double> posterior(nquad,1.0);
        for(int q = 0; q < nquad; ++q)
            posterior[q] = posterior[q] * prior[q];
        for (int item = 0; item < nitems; ++item)
            if(data(pat,item))
                for(int q = 0; q < nquad; ++q)
                    posterior[q] *= itemtrace(q,item);
        double expd = 0.0;
        for(int i = 0; i < nquad; ++i)
            expd += posterior[i];
        expected[pat] = expd;
        for(int q = 0; q < nquad; ++q)
            posterior[q] = r[pat] * posterior[q] / expd;
       #pragma omp critical
        for (int item = 0; item < nitems; ++item)
            if (data(pat,item))
                for(int q = 0; q < nquad; ++q)
                    r1vec[q + item*nquad] += posterior[q];
    } //end main

}
예제 #8
0
파일: Estep.cpp 프로젝트: jacobxk/mirt
void _Estepbfactor(vector<double> &expected, vector<double> &r1, vector<double> &ri,
    const NumericMatrix &itemtrace, const vector<double> &prior, const vector<double> &Priorbetween, 
    const vector<int> &r, const int &ncores, const IntegerMatrix &data, const IntegerMatrix &sitems,
    const vector<double> &Prior)
{
     #ifdef SUPPORT_OPENMP
    omp_set_num_threads(ncores);
    #endif
    const int sfact = sitems.ncol();
    const int nitems = data.ncol();
    const int npquad = prior.size();
    const int nbquad = Priorbetween.size();
    const int nquad = nbquad * npquad;
    const int npat = r.size();
    vector<double> r1vec(nquad*nitems*sfact, 0.0);

#pragma omp parallel for
    for (int pat = 0; pat < npat; ++pat){
        vector<double> L(nquad), Elk(nbquad*sfact), posterior(nquad*sfact);
        vector<double> likelihoods(nquad*sfact, 1.0);
        for (int fact = 0; fact < sfact; ++fact){
            for (int item = 0; item < nitems; ++item){
                if (data(pat,item) && sitems(item,fact))
                    for (int k = 0; k < nquad; ++k)
                        likelihoods[k + nquad*fact] = likelihoods[k + nquad*fact] * itemtrace(k,item);
            }
        }
        vector<double> Plk(nbquad*sfact);
        for (int fact = 0; fact < sfact; ++fact){
            int k = 0;
            for (int q = 0; q < npquad; ++q){
                for (int i = 0; i < nbquad; ++i){
                    L[k] = likelihoods[k + nquad*fact] * prior[q];
                    ++k;
                }
            }
            vector<double> tempsum(nbquad, 0.0);
            for (int i = 0; i < npquad; ++i)
                for (int q = 0; q < nbquad; ++q)
                    tempsum[q] += L[q + i*nbquad];
            for (int i = 0; i < nbquad; ++i)
                Plk[i + fact*nbquad] = tempsum[i];
        }
        vector<double> Pls(nbquad, 1.0);
        for (int i = 0; i < nbquad; ++i){
            for(int fact = 0; fact < sfact; ++fact)
                Pls[i] = Pls[i] * Plk[i + fact*nbquad];
            expected[pat] += Pls[i] * Priorbetween[i];
        }
        for (int fact = 0; fact < sfact; ++fact)
            for (int i = 0; i < nbquad; ++i)
                Elk[i + fact*nbquad] = Pls[i] / Plk[i + fact*nbquad];
        for (int fact = 0; fact < sfact; ++fact)
            for (int i = 0; i < nquad; ++i)
                posterior[i + nquad*fact] = likelihoods[i + nquad*fact] * r[pat] * Elk[i % nbquad + fact*nbquad] /
                                            expected[pat];
        #pragma omp critical
        for (int i = 0; i < nbquad; ++i)
            ri[i] += Pls[i] * r[pat] * Priorbetween[i] / expected[pat];
        for (int item = 0; item < nitems; ++item)
            if (data(pat,item))
                for (int fact = 0; fact < sfact; ++fact)
                    for(int q = 0; q < nquad; ++q)
                        r1vec[q + fact*nquad*nitems + nquad*item] += posterior[q + fact*nquad];
    }   //end main
    for (int item = 0; item < nitems; ++item)
        for (int fact = 0; fact < sfact; ++fact)
            if(sitems(item, fact))
                for(int q = 0; q < nquad; ++q)
                    r1[q + nquad*item] = r1vec[q + nquad*item + nquad*nitems*fact] * Prior[q];
}
예제 #9
0
/**
 * Create an FST based on an RNN
 */
void FlatBOFstBuilder::convertRNN(CRnnLM & rnnlm, VectorFst<LogArc> &fst) {
	queue<NeuronFstHistory> q;
	VectorFst<LogArc> new_fst;
	
	NeuronFstHistory fsth(rnnlm.getHiddenLayerSize(),getNumBins());
	FstIndex id = 0;
	
	NeuronFstHistory new_fsth(rnnlm.getHiddenLayerSize(),getNumBins());
	FstIndex new_id;

	NeuronFstHistory min_backoff(rnnlm.getHiddenLayerSize(),getNumBins());
	set<NeuronFstHistory>set_min_backoff;
	
	NeuronFstHistory bo_fsth(rnnlm.getHiddenLayerSize(),getNumBins());
	bool backoff = false;
	vector<FstIndex> deleted;


	real p = 0.00;
	real p_joint = 0.00;
	real entropy = 0.0;
	real delta = 0.0;
	vector<real> all_prob(rnnlm.getVocabSize());
 	vector<real> posterior(10);
	
	map< FstIndex,set<FstIndex> > pred;
	vector<bool> non_bo_pred(rnnlm.getVocabSize());
	vector<int> to_be_added;
	vector<int> to_be_removed;
	for (int i = 0; i < rnnlm.getVocabSize(); i++) {
		to_be_removed.push_back(i);
	}
	vector<real> to_be_added_prob;


 	FstIndex n_added = 0;
 	FstIndex n_processed = 0;
 	FstIndex next_n_added = 0;
 	FstIndex next_n_processed = 0;
 	FstIndex n_backoff = 0;
 	FstIndex n_only_backoff = 0;
 	
	int v = rnnlm.getVocabSize();
	int w = 0;


	// Initialize
	rnnlm.copyHiddenLayerToInput();
//	printNeurons(rnnlm.getInputLayer(),0,10);

	// Initial state ( 0 | hidden layer after </s>)
	printNeurons(rnnlm.getHiddenLayer(),0,10);
	fsth.setFstHistory(rnnlm, *dzer);
	fsth.setLastWord(0);
	q.push(fsth);
	addFstState(id, new NeuronFstHistory(fsth), fst);
	fst.SetStart(INIT_STATE);
	
	// Final state (don't care about the associated discrete representation)
	fst.AddState();
	fst.SetFinal(FINAL_STATE, LogWeight::One());
	
 	/*posterior.at(INIT_STATE) = MY_LOG_ONE;*/
	min_backoff.setLastWord(-1);
	computeEntropyAndConditionals(entropy, all_prob, rnnlm, min_backoff);
	min_backoff = getBackoff(rnnlm, min_backoff, set_min_backoff, all_prob, to_be_removed);
	cout << "MIN BACKOFF " << min_backoff.toString() << endl;
	set_min_backoff.insert(min_backoff);
	
//	addFstState(id, min_backoff, fst);
//	q.push(min_backoff);
	

	
	// Estimate number of backoff loop to bound the backoff path length
// 	float ratioa = 0.0;
// 	float ratiob = 0.0;
	float ratio = 0.0;
// 	for (int i=0; i < min_backoff.getNumDims(); i++) {
// 		if (min_backoff.getDim(i) == 1) {
// 			ratioa++;
// 		}
// 		if (fsth.getDim(i) == 1) {
// 			ratiob++;
// 		}
// 	}
// 	ratioa /= min_backoff.getNumDims();
// 	ratiob /= min_backoff.getNumDims();
// 	ratio = (ratioa*(1.0-ratiob))+(ratiob*(1.0-ratioa));
	ratio=1.0;

//	printf("ratio=%f\t%i BO loops\n", ratio, n_bo_loops);
	
	
	
	//foreach state in the queue
	while (!q.empty()) {
		fsth = q.front();
		q.pop();
		id = h2state[&fsth];
		state2h.push_back(new NeuronFstHistory(fsth));
		if (id == FINAL_STATE) { continue; }


		
		
	dprintf(1,"-- STUDY STATE %li = %s\n", id, fsth.toString().c_str());
	

/*		try { posterior.at(id) = MY_LOG_ONE; }
		catch (exception e) {
			posterior.resize((int) (posterior.size()*1.5)+1);
			posterior.at(id) = MY_LOG_ONE;
		}*/
		
		computeEntropyAndConditionals(entropy, all_prob, rnnlm, fsth);
		
		//compute BO in advance and check if it is a min BO node
		bo_fsth = getBackoff(rnnlm, fsth, set_min_backoff, all_prob, to_be_removed);
		if (bo_fsth == fsth) { bo_fsth = min_backoff; }
			
		//foreach w (ie, foreach word of each class c)
		//test if the edge has to kept or removed
		backoff = false; //no backoff yet since no edge has been removed
		for (w=0; w < rnnlm.getVocabSize(); w++) {
				p = all_prob[w];
				
				/*p_joint = exp(-posterior[id]-p);*/
				p_joint = exp(-p);
				delta = -1.0*p_joint*log2(p_joint);
				
				//accept edge if this leads to a minimum
				//relative gain of the entropy

				dprintf(2,"P = %e \tP_joint = %e \tH = %e \tDelta =%e \tDelta H = %.6f %%\n",exp(-p), p_joint, entropy, delta, 100.0*delta/entropy);

				if (set_min_backoff.find(fsth) != set_min_backoff.end() || (delta > pruning_threshold*entropy)) {
//				if ((fsth == min_backoff) || (delta > pruning_threshold*entropy)) {
					next_n_added++;
					to_be_added.push_back(w);
					to_be_added_prob.push_back(p);
					dprintf(2,"\tACCEPT [%li] -- %i (%s) / %f --> ...\t(%e > %e)\n", id, w, rnnlm.getWordString(w), p, delta, pruning_threshold*entropy);
//					to_be_removed.push_back(w);
 				}
 				//backoff
				else {
//					to_be_removed.push_back(w);
					backoff = true;
					dprintf(2,"\tPRUNE [%li] -- %i / %f --> ...\n", id, w, p);
 				}
 				
 				//print
				if (next_n_processed % 100000 == 0) {
						fprintf(stderr, "\rH=%.5f / N proc'd=%li / N added=%li (%.5f %%) / N bo=%li (%.5f %%) / %li/%li Nodes (%2.1f %%) / N min BO=%i", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, n_backoff, ((float) n_backoff/ (float)n_added)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size())), (int) set_min_backoff.size());
				}
				next_n_processed++;
 				
//			}
		}


		//Set a part of the new FST history
		new_fsth.setFstHistory(rnnlm, *dzer);

		//if at least one word is backing off
		if (backoff) {
			
			n_backoff++;
			if (to_be_added.size() == 0) {
				n_only_backoff++;
			}
			
			
			if (addFstState(new_id, new NeuronFstHistory(bo_fsth), fst)) {
				q.push(bo_fsth);
				try { non_bo_pred.at(new_id) = false; }
				catch (exception e) {
					non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1);
					non_bo_pred.at(new_id) = false;
				}
				
			}
			dprintf(1,"BACKOFF\t[%li]\t(%s)\n-------\t[%li]\t(%s)\n", id, fsth.toString().c_str(), new_id, bo_fsth.toString().c_str());

			fst.AddArc(id, LogArc(EPSILON, EPSILON, LogWeight::Zero(), new_id));
			
			addPred(pred, new_id, id);
			
		}
		
		
		vector<real>::iterator it_p = to_be_added_prob.begin();
		for (vector<int>::iterator it = to_be_added.begin(); it != to_be_added.end(); ++it) {
			w = *it;
			p = *it_p;

			if (w == 0) {
				fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,FINAL_STATE));
				dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] FINAL STATE)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, FINAL_STATE);				
			}
		
			//accept edge
			else {
				new_fsth.setLastWord(w);
	
				//if sw not in the memory
				//then add a new state for sw in the FST and push sw in the queue
				if (addFstState(new_id, new NeuronFstHistory(new_fsth), fst)) {
					q.push(new_fsth);
					try { non_bo_pred.at(new_id) = true; }
					catch (exception e) {
						non_bo_pred.resize(new_id+(int) (non_bo_pred.size()*0.5)+1);
						non_bo_pred.at(new_id) = true;
					}
				}
				else { /* already exists */ }
			
				//add the edge in the FST
				non_bo_pred.at(new_id) = true;
				fst.AddArc(id, LogArc(FstWord(w),FstWord(w),p,new_id));
				dprintf(1,"EDGE [%li] (%s)\n---- %i (%s) / %f -->\n---- [%li] (%s)\n\n", id, fsth.toString().c_str(), FstWord(w), rnnlm.getWordString(w), p, new_id, new_fsth.toString().c_str());				

//				posterior.at(new_id) += posterior[id]*p;

			}
			
			/*if (posterior[id]+p < LogWeight::Zero().Value()) {
				p_joint = exp(-posterior[id]-p);
				entropy -= p_joint*log2(p_joint);
			}*/
			
			++it_p;
		}
		
		n_added = next_n_added;
		n_processed = next_n_processed;
		
		//reset queues
		to_be_added.clear();
		to_be_added_prob.clear();
//		to_be_removed.clear();
		
	}

	cout << endl;
	
	//compute backoff weights
	deleted = compactBackoffNodes(fst, pred, non_bo_pred);
	computeAllBackoff(fst, pred);


	//remove useless nodes
	removeStates(fst, new_fst, deleted);
	fst.DeleteStates();
	fst = new_fst;
	
	//Fill the table of symbols
	SymbolTable dic("dictionnary");
	dic.AddSymbol("*", 0);
	for (int i=0; i<rnnlm.getVocabSize(); i++) {
		dic.AddSymbol(string(rnnlm.getWordString(i)), i+1);
	}
	fst.SetInputSymbols(&dic);
	fst.SetOutputSymbols(&dic);

						//printf("H=%.5f / N proc'd=%li / N added=%li (%.5f %%) %li/%li Nodes (%2.1f %%)\n", entropy, n_processed, n_added, ((float) n_added/ (float)n_processed)*100.0, id, id+q.size(), 100.0 - (float) (100.0*id/(id+q.size())));
	cout << "END" << endl;
	
}
예제 #10
0
파일: match.c 프로젝트: feltstykket/scythe
match *find_best_match(const adapter_array *aa, const char *read,  
                       float *p_quals, float prior, float p_match, int min_l) {
  /* 
   Take an adapter array, and check the read against all
   adapters. Brute force string matching is used. This is to avoid
   approximate matching algorithms which required an a priori
   specified number mismatches.

  */
  
  match *best_match=NULL;
  int i, shift, max_shift, found_contam=0;
  int *best_arr=NULL, best_adapter=0, best_length=0, best_shift=0, best_score=INT_MIN;
  int al, curr_score, *curr_arr=NULL;
  int rl = strlen(read);
  posterior_set *ps=NULL;
  float *best_p_quals=NULL;

  max_shift = rl - min_l;
  for (shift = 0; shift < max_shift; shift++) {
    for (i = 0; i < aa->n; i++) {
      if (min_l >= aa->adapters[i].length) {
        fprintf(stderr, "Minimum match length (option -n) greater than or " \
                "equal to length of adapter.\n");
        exit(EXIT_FAILURE);
      }
      al = min(aa->adapters[i].length, strlen(&(read)[shift]));
      curr_arr = score_sequence(&(read)[shift], (aa->adapters[i]).seq, al);
      curr_score = sum(curr_arr, al);
      if (curr_score > best_score) {
        best_score = curr_score; 
        best_length = al;
        best_shift = shift;
        best_p_quals = &(p_quals)[shift];
        best_arr = curr_arr;
        best_adapter = i;
        ps = posterior(best_arr, best_p_quals, prior, 0.25, best_length);
        found_contam = ps->is_contam;
        if (found_contam) {
          break;
        } else {
          free(ps); 
          ps=NULL;
          free(best_arr);
        }
      } else free(curr_arr);
    }
    if (found_contam)
      break;
  }
  
  if (!found_contam) /* no match found */
    return NULL;
  
  /* save this match */
  best_match = xmalloc(sizeof(match));
  best_match->match = best_arr;
  best_match->shift = best_shift;
  best_match->length = best_length;
  best_match->ps = ps;
  best_match->score = best_score;
  best_match->adapter_index = best_adapter;
  best_match->p_quals = best_p_quals;
  best_match->match_pos = calloc(best_length, sizeof(int));
  for (i = 0; i < best_length; i++)
    best_match->match_pos[i] = best_match->match[i] == MATCH_SCORE;
  return best_match;
}