void NaiveBayesGibbs::updateTheta(int round)
{
    std::mt19937 eng(int(time(0)));
    //#pragma omp parallel for
    for (int c=0; c<numCategories_; c++)
    {
        std::vector<double> y;
        double sum = 0.0;

        HashMap::iterator iter = thetaCur_.begin();
        for (; iter!=thetaCur_.end(); iter++)
        {
            std::gamma_distribution<double> gamma(wordCount_[iter->first][c] + DIRICHLET_HYPERPARAMETER);
            double tmp = log(1.0 * gamma(eng));
            y.push_back(tmp);
            sum = logsumexp(sum, tmp, iter==thetaCur_.begin());
        }

        int num=0;
        for (iter=thetaCur_.begin(); iter!=thetaCur_.end(); iter++)
        {
            double value = y[num]-sum;
            iter->second[c] = value;
//            thetaHistory_[iter->first][c] += value;
            double tmp = thetaHistory_[iter->first][c];
            if (round % INTERVAL == 0)
                thetaHistory_[iter->first][c] = logsumexp(tmp, value, tmp==0);
            num++;
        }
    }
}
Exemple #2
0
Real gmm_objective(std::vector<VectorD> const& x,
  Vector const& alphas, std::vector<VectorD> const& means, std::vector<VectorD> const& qs, std::vector<Vector> const& ls,
  Real wishart_gamma, Real wishart_m)
{
  size_t n = x.size();
  size_t d = x[0].size();
  size_t K = alphas.size();
  //    assert (K = rows means)
  //    assert (d = cols means)
  //    assert (K = rows qs)
  //    assert (d = cols qs)
  //    assert (K = rows ls)
  //    assert (auto di = (cardToInt d) in di*(di-1)/2 = cardToInt (cols ls))
  double out = 0.0;
  Vector tmp{ K };
  for (size_t i = 0; i < n; ++i) {
    for (size_t k = 0; k < K; ++k)
      tmp[k] = alphas[k] + sum(qs[k]) - 0.5 * sumsq(Qtimesv(qs[k], ls[k], x[i] - means[k]));
    out += logsumexp(tmp);
  }
  out -= n * logsumexp(alphas);
  for (size_t k = 0; k < K; ++k)
    out += sumsq(exp(qs[k])) + sumsq(ls[k]);
  return out;
}
Exemple #3
0
void CapR::CalcLogSumBulgeAndInternalProbability(vector<double> &bulge_probability, vector<double> &internal_probability){
  double probability = 0;
  double temp = 0;
  int type = 0;
  int type2 = 0;
  vector<bool> b_flag_array; b_flag_array.resize(_seq_length,0);
  vector<bool> i_flag_array; i_flag_array.resize(_seq_length,0);
  
  for(int i = 1; i<_seq_length-TURN-2;i++){
    for(int j = i+TURN+3; j<=min(i+_maximal_span,_seq_length);j++){
      type = BP_pair[_int_sequence[i]][_int_sequence[j]];
      if (type!=0) {
	for (int p =i+1; p <= min(i+MAXLOOP+1,j-TURN-2); p++) {
	  int u1 = p-i-1;
	  for (int q=max(p+TURN+1,j-MAXLOOP+u1-1); q<j; q++) {
	    type2 = BP_pair[_int_sequence[p]][_int_sequence[q]];
	    if (type2 != 0 && !(p == i+1 && q == j-1)) {
	      type2 = rtype[type2];
	      if(_Beta_stemend[i][j-i-1] != -INF && _Alpha_stem[p-1][q-p+1] != -INF){
		temp = _Beta_stemend[i][j-i-1] + LoopEnergy(type, type2,i,j,p,q)+_Alpha_stem[p-1][q-p+1];
		
		for(int k = i+1; k <= p-1;k++){
		  if(j == q+1){
		    bulge_probability[k-1] = b_flag_array[k-1] == 1 ? logsumexp(bulge_probability[k-1], temp) : temp;
		    b_flag_array[k-1] = 1;		   
		  }else{
		    internal_probability[k-1] = i_flag_array[k-1] == 1 ? logsumexp(internal_probability[k-1], temp) : temp;
		    i_flag_array[k-1] = 1;		  
		  }
		}
		
		for(int k = q+1; k <= j-1;k++){
		  if(i == p-1){
		    bulge_probability[k-1] = b_flag_array[k-1] == 1 ? logsumexp(bulge_probability[k-1], temp) : temp;
		    b_flag_array[k-1] = 1;
		  }else{
		    internal_probability[k-1] = i_flag_array[k-1] == 1 ? logsumexp(internal_probability[k-1], temp) : temp;
		    i_flag_array[k-1] = 1;
		  }
		}
	      } 
	    }
	  }
	}
      }
    }
  }
  
  for(int i=0;i<_seq_length;i++){
    if(b_flag_array[i]==1){
      bulge_probability[i] = exp(bulge_probability[i]-_Alpha_outer[_seq_length]);
    }
    if(i_flag_array[i]==1){
      internal_probability[i] = exp(internal_probability[i]-_Alpha_outer[_seq_length]);
    }
  }
}
Exemple #4
0
void gmm_objective(int d, int k, int n,
  double* alphas, 
  double* means,
  double* icf, 
  double *x,
  Wishart wishart, 
  double *err)
{
  int ik, ix, icf_sz;
  double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered;
  double slse, lse_alphas, CONSTANT;
  CONSTANT = -n*d*0.5*log(2 * PI);
  icf_sz = d*(d + 1) / 2;

  main_term = (double *)malloc(k*sizeof(double));
  sum_qs = (double *)malloc(k*sizeof(double));
  Qdiags = (double *)malloc(d*k*sizeof(double));
  xcentered = (double *)malloc(d*sizeof(double));
  Qxcentered = (double *)malloc(d*sizeof(double));

  preprocess_qs(d, k, icf, sum_qs, Qdiags);

  slse = 0.;
  for (ix = 0; ix < n; ix++)
  {
    for (ik = 0; ik < k; ik++)
    {
      subtract(d, &x[ix*d], &means[ik*d], xcentered);
      Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered);

      main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*sqnorm(d, Qxcentered);
    }
    slse = slse + logsumexp(k, main_term);
  }
  free(main_term);
  free(xcentered);
  free(Qxcentered);

  lse_alphas = logsumexp(k, alphas);
  *err = CONSTANT + slse - n*lse_alphas;

  *err = *err + log_wishart_prior(d, k, wishart, sum_qs, Qdiags, icf);

  free(sum_qs);
  free(Qdiags);

  // this is here so that tapenade would recognize that means and inv_cov_factors are variables
  *err = *err + ((means[0] - means[0]) +
    (icf[0] - icf[0]));
}
Exemple #5
0
void CapR::CalcHairpinProbability(vector<double> &hairpin_probability){
  for(int x = 1; x <=_seq_length;x++){
    double temp = 0.0;
    int type = 0;
    bool flag = 0;
    double h_energy = 0.0;
    
    for(int i = max(1,x-_maximal_span);i<x ;i++){
      for(int j = x+1; j<=min(i+_maximal_span,_seq_length);j++){
	type = BP_pair[_int_sequence[i]][_int_sequence[j]];
	if(_Beta_stemend[i][j-i-1] != -INF){
	  h_energy = _Beta_stemend[i][j-i-1] + HairpinEnergy(type, i,j);
	  temp = flag == 1 ? logsumexp(temp, h_energy) : h_energy;
	  flag = 1;
	}
      }
    }

    if(flag == 1){
      hairpin_probability[x-1] = exp(temp-_Alpha_outer[_seq_length]);
    }else{
      hairpin_probability[x-1] = 0.0;
    }
  }
}
Exemple #6
0
void gmm_objective_split_other(int d, int k, int n,
  double* alphas,
  double* icf,
  Wishart wishart,
  double *err)
{
  double *sum_qs, *Qdiags;
  double lse_alphas, CONSTANT;
  CONSTANT = -n*d*0.5*log(2 * PI);

  sum_qs = (double *)malloc(k*sizeof(double));
  Qdiags = (double *)malloc(d*k*sizeof(double));

  preprocess_qs(d, k, icf, sum_qs, Qdiags);

  lse_alphas = logsumexp(k, alphas);
  *err = CONSTANT - n*lse_alphas;

  *err = *err + log_wishart_prior(d, k, wishart, sum_qs, Qdiags, icf);

  free(sum_qs);
  free(Qdiags);

  // this is here so that tapenade would recognize that means and inv_cov_factors are variables
  *err = *err + ((alphas[0] - alphas[0]) + (icf[0] - icf[0]));
}
 void Node::calcBetaWithConstraints() {
     beta = 0.0;
     for (const_Path_iterator it = rpath.begin(); it != rpath.end(); ++it)
       beta = logsumexp(beta,
           (*it)->cost + (*it)->constraintcost + (*it)->rnode->beta +(*it)->rnode->cost + (*it)->rnode->constraintcost,
           (it == rpath.begin()));
   }
 void Node::calcAlphaWithConstraints() {
     alpha = 0.0;
     for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it)
       alpha = logsumexp(alpha,
           (*it)->cost + (*it)->constraintcost + (*it)->lnode->alpha,
           (it == lpath.begin()));
     alpha += cost+constraintcost; //node->constraintcost
   }
Exemple #9
0
void Node::calcAlpha() {
  alpha = 0.0;
  for (const_Path_iterator it = lpath.begin(); it != lpath.end(); ++it) {
    alpha = logsumexp(alpha,
                      (*it)->cost + (*it)->lnode->alpha,
                      (it == lpath.begin()));
  }
  alpha += cost;
}
Exemple #10
0
void Node::calcBeta() {
  beta = 0.0;
  for (const_Path_iterator it = rpath.begin(); it != rpath.end(); ++it) {
    beta = logsumexp(beta,
                     (*it)->cost + (*it)->rnode->beta,
                     (it == rpath.begin()));
  }
  beta += cost;
}
Exemple #11
0
double CapR::CalcMultiProbability(int x){
  double probability = 0.0;
  double temp = 0.0;
  bool flag = 0;
  
  for(int i = x; i<=min(x+_maximal_span,_seq_length);i++){
    if(_Beta_multi[x-1][i-x+1] != -INF && _Alpha_multi[x][i-x] != -INF){
      temp = flag == 0 ? _Beta_multi[x-1][i-x+1] + _Alpha_multi[x][i-x] : logsumexp(temp,_Beta_multi[x-1][i-x+1] + _Alpha_multi[x][i-x]);
      flag = 1;
    }
  }
  
  for(int i = max(0,x-_maximal_span); i<x;i++){
    if(_Beta_multi2[i][x-i] != -INF && _Alpha_multi2[i][x-i-1] != -INF){
      temp = flag == 0 ? _Beta_multi2[i][x-i] + _Alpha_multi2[i][x-i-1] : logsumexp(temp,_Beta_multi2[i][x-i] + _Alpha_multi2[i][x-i-1]);
      flag = 1;
    }
  }
  if(flag == 1){ probability = exp(temp-_Alpha_outer[_seq_length]); }
  return(probability);
}
Exemple #12
0
inline double logsumexp(std::vector<double>::const_iterator b,
                        std::vector<double>::const_iterator e) {
  if (b != e) {
    double z = 0;
    for (auto it = b; it != e; ++it) {
      z = logsumexp(z, *it, it == b);
    }
    return z;
  } else {
    return 1;
  }
}
Exemple #13
0
  void TaggerImpl::forwardbackward() {
    if (x_.empty()) return;

    for (int i = 0; i < static_cast<int>(x_.size()); ++i)
      for (size_t j = 0; j < ysize_; ++j)
        node_[i][j]->calcAlpha();

    for (int i = static_cast<int>(x_.size() - 1); i >= 0;  --i)
      for (size_t j = 0; j < ysize_; ++j)
        node_[i][j]->calcBeta();

    Z_ = 0.0;
    for (size_t j = 0; j < ysize_; ++j)
      Z_ = logsumexp(Z_, node_[0][j]->beta, j == 0);

    return;
  }
Exemple #14
0
	void Tagger::forwardbackward()
	{
		if (empty()) return;
		size_t tagsNum = featureIndexPtr->getSizeOfTags();
		size_t nodesize = getSizeOfNodes();
		for (size_t eachNodeIdx = 0; eachNodeIdx < nodesize; ++eachNodeIdx)
			for (size_t eachTagIdx = 0; eachTagIdx < tagsNum; ++eachTagIdx)
				getNode(eachNodeIdx,eachTagIdx)->calcAlpha();

		for (int eachNodeIdx = nodesize-1; eachNodeIdx >= 0; --eachNodeIdx)
			for (size_t eachTagIdx = 0; eachTagIdx < tagsNum; ++eachTagIdx)	
				getNode(eachNodeIdx, eachTagIdx)->calcBeta();

		Z_ = 0.0;
		for (size_t eachTagIdx = 0; eachTagIdx < tagsNum; ++eachTagIdx)
			Z_ = logsumexp(Z_, getNode(0, eachTagIdx)->beta, eachTagIdx == 0);
	
		return;
	}
Exemple #15
0
double logisticloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) {
        double obj = 0.0;
        int m = O.Width();
        int n = O.Height();
        double t;
        int i;
        //double start = omp_get_wtime( );

#ifdef SKYLARK_HAVE_OPENMP
        #pragma omp parallel for reduction(+:obj) private(i,t)
#endif
        for(int i=0;i<m;i++) {
        	t = (int) T.Get(i, 0);
            obj += -O.Get(t, i) + logsumexp(O.Buffer(0, i), n);
        }

        //double end = omp_get_wtime( );

        // std::cout << end - start <<  " secs " << std::endl;

        return obj;
}
Exemple #16
0
void gmm_objective_split_inner(int d, int k,
  double* alphas,
  double* means,
  double* icf,
  double *x,
  double *err)
{
  int ik, ix, icf_sz;
  double *main_term, *sum_qs, *Qdiags, *xcentered, *Qxcentered;
  icf_sz = d*(d + 1) / 2;

  main_term = (double *)malloc(k*sizeof(double));
  sum_qs = (double *)malloc(k*sizeof(double));
  Qdiags = (double *)malloc(d*k*sizeof(double));
  xcentered = (double *)malloc(d*sizeof(double));
  Qxcentered = (double *)malloc(d*sizeof(double));

  preprocess_qs(d, k, icf, sum_qs, Qdiags);

  for (ik = 0; ik < k; ik++)
  {
    subtract(d, x, &means[ik*d], xcentered);
    Qtimesx(d, &Qdiags[ik*d], &icf[ik*icf_sz + d], xcentered, Qxcentered);
    
    main_term[ik] = alphas[ik] + sum_qs[ik] - 0.5*sqnorm(d, Qxcentered);
  }
  *err = logsumexp(k, main_term);
  free(main_term);
  free(xcentered);
  free(Qxcentered);
  free(sum_qs);
  free(Qdiags);

  // this is here so that tapenade would recognize that means and inv_cov_factors are variables
  *err = *err + ((means[0] - means[0]) +
    (icf[0] - icf[0]));
}
Exemple #17
0
double logsumexp(double a, double b)
{
	double x[2] = {a,b};
	return logsumexp(x, 2);
}
Exemple #18
0
inline double logsumexp(const std::vector<double>& a) {
  return logsumexp(a.begin(), a.end());
}
Exemple #19
0
inline void normalize_logpdf(std::vector<double>::iterator b, std::vector<double>::iterator e) {
  double log_sum = logsumexp(b, e);
  for (; b != e; ++b) {
    *b = exp(*b - log_sum);
  }
}
Exemple #20
0
int logisticloss::logexp(int index, double* v, int n, double lambda, double* x, int MAXITER, double epsilon, int DISPLAY) {
    /* solution to - log exp(x(i))/sum(exp(x(j))) + lambda/2 ||x - v||_2^2 */
    /* n is length of v and x */
    /* writes over x */
    double alpha = 0.1;
    double beta = 0.5;
    int iter, i;
    double t, logsum, p, pu, pptil, decrement;
    double *u = (double *) malloc(n*sizeof(double));
    double *z = (double *) malloc(n*sizeof(double));
    double *grad = (double *) malloc(n*sizeof(double));
    double newobj=0.0, obj=0.0;
    obj = objective(index, x, v, n, lambda);

    for(iter=0;iter<MAXITER;iter++) {
        logsum = logsumexp(x,n);
        if(DISPLAY)
            printf("iter=%d, obj=%f\n", iter, obj);
        pu = 0.0;
        pptil = 0.0;
        for(i=0;i<n;i++) {
            p = exp(x[i] - logsum);
            grad[i] = p + lambda*(x[i] - v[i]);
            if(i==index)
                grad[i] += -1.0;
            u[i] = grad[i]/(p+lambda);
            pu += p*u[i];
            z[i] = p/(p+lambda);
            pptil += z[i]*p;
        }
        pptil = 1 - pptil;
        decrement = 0.0;
        for(i=0;i<n;i++) {
            u[i] -= (pu/pptil)*z[i];
            decrement += grad[i]*u[i];
        }
        if (decrement < 2*epsilon) {
        	// std::cout << "decrement =  " << decrement << std::endl;
            free(u);
            free(z);
            free(grad);
            return 0;
        }
        t = 1.0;
        while(1) {
            for(i=0;i<n;i++)
                z[i] = x[i] - t*u[i];
            newobj = objective(index, z, v, n, lambda);
            if (newobj <= obj + alpha*t*decrement)
                break;
            t = beta*t;
        }
        for(i=0;i<n;i++)
            x[i] = z[i];
            obj = newobj;
    }
    free(u);
    free(z);
    free(grad);
    return 1;
}
Exemple #21
0
double logisticloss::objective(int index, double* x, double* v, int n, double lambda) {
        double nrmsqr = normsquare(x,v,n);
        double obj = -x[index] + logsumexp(x, n) + 0.5*lambda*nrmsqr;
        return obj;
        }
Exemple #22
0
void CapR::CalcOutsideVariable(){
  //Beta_outer
  for(int i = _seq_length-1;i >= 0;i--){
    double temp = _Beta_outer[i+1];
    for(int p = i+1; p <= min(i+_maximal_span+1,_seq_length);p++){
      if(_Alpha_stem[i][p-i] != -INF){
	int type = BP_pair[_int_sequence[i+1]][_int_sequence[p]];
	double bo = _Alpha_stem[i][p-i] + CalcDangleEnergy(type,i,p);
	temp = logsumexp(temp,bo+_Beta_outer[p]);
      }
    }
    _Beta_outer[i] = temp;	
  }
  
  for (int q=_seq_length; q>=TURN+1; q--) {
    for (int p=max(0,q-_maximal_span-1); p<= q-TURN; p++) {
      int type = 0;
      int type2 = 0;

      double temp = 0; bool flag = 0;
      if(p != 0 && q != _seq_length){
	//Beta_stemend
	_Beta_stemend[p][q-p] = q-p >= _maximal_span ? -INF : _Beta_stem[p-1][q-p+2];
	
	//Beta_Multi
	flag = 0;
	if(q-p+1 <= _maximal_span+1){
	  if(_Beta_multi[p-1][q-p+1] != -INF){
	    temp = _Beta_multi[p-1][q-p+1] + MLbase;
	    flag = 1;
	  }
	}
	
	type = BP_pair[_int_sequence[p]][_int_sequence[q+1]];
	int tt = rtype[type];
	if(flag == 1){
	  if(_Beta_stemend[p][q-p] != -INF){
	    temp = logsumexp(temp,_Beta_stemend[p][q-p]+MLclosing+MLintern+ dangle3[tt][_int_sequence[p+1]]+dangle5[tt][_int_sequence[q]]);
	  }
	}else{
	  if(_Beta_stemend[p][q-p] != -INF){
	    temp = _Beta_stemend[p][q-p]+MLclosing+MLintern+dangle3[tt][_int_sequence[p+1]]+dangle5[tt][_int_sequence[q]];
	  }else{
	    temp = -INF;
	  }
	}
	_Beta_multi[p][q-p] = temp;
	
	//Beta_Multi1
	temp = 0; flag = 0;
	for(int k = q+1 ; k<= min(_seq_length,p+_maximal_span);k++){
	  if(_Beta_multibif[p][k-p] != -INF && _Alpha_multi2[q][k-q] != -INF){
	    temp = flag == 0 ? _Beta_multibif[p][k-p]+_Alpha_multi2[q][k-q] : logsumexp(temp,_Beta_multibif[p][k-p]+_Alpha_multi2[q][k-q]) ;
	    flag = 1;
	  }
	}
	_Beta_multi1[p][q-p] = flag == 1 ? temp: -INF;
	
	//Beta_Multi2
	temp = 0; flag = 0;
	if(_Beta_multi1[p][q-p] != -INF){
	  temp = _Beta_multi1[p][q-p];
	  flag = 1;
	}
	if(q-p <= _maximal_span){
	  if(_Beta_multi2[p][q-p+1] != -INF){
	    temp = flag == 1 ? logsumexp(temp,_Beta_multi2[p][q-p+1]+MLbase) : _Beta_multi2[p][q-p+1]+MLbase;
	    flag = 1;
	  }
	}
	
	for(int k = max(0,q-_maximal_span); k < p ;k++){
	  if(_Beta_multibif[k][q-k] != -INF && _Alpha_multi1[k][p-k] != -INF){
	    temp = flag == 0 ? _Beta_multibif[k][q-k]+_Alpha_multi1[k][p-k] : logsumexp(temp,_Beta_multibif[k][q-k]+_Alpha_multi1[k][p-k]);
	    flag = 1;
	  }
	}
	_Beta_multi2[p][q-p] = flag == 0 ? -INF : temp;
	
	//Beta_multibif
	if(_Beta_multi1[p][q-p] != -INF && _Beta_multi[p][q-p] != -INF){
	  _Beta_multibif[p][q-p] = logsumexp(_Beta_multi1[p][q-p],_Beta_multi[p][q-p]);
	}else if(_Beta_multi[p][q-p] == -INF){
	  _Beta_multibif[p][q-p] = _Beta_multi1[p][q-p];
	}else if(_Beta_multi1[p][q-p] == -INF){
	  _Beta_multibif[p][q-p] = _Beta_multi[p][q-p];
	}else{
	  _Beta_multibif[p][q-p] = -INF;
	}
	
      }
      
      //Beta_stem
      type2 = BP_pair[_int_sequence[p+1]][_int_sequence[q]];
      if(type2 != 0){
	temp = _Alpha_outer[p]+_Beta_outer[q]+CalcDangleEnergy(type2,p,q);
	
	type2 = rtype[type2];
	for (int i=max(1,p-MAXLOOP); i<=p; i++){
	  for (int j=q; j<=min(q+ MAXLOOP -p+i,_seq_length-1); j++) {
	    type = BP_pair[_int_sequence[i]][_int_sequence[j+1]];
	    if (type != 0 && !(i == p && j == q)) {
	      if(j-i <= _maximal_span+1 && _Beta_stemend[i][j-i] != -INF){
		temp = logsumexp(temp,_Beta_stemend[i][j-i]+LoopEnergy(type,type2,i,j+1,p+1,q));
	      }
	    }
	  }
	}
	
	if(p != 0 && q != _seq_length){
	  type = BP_pair[_int_sequence[p]][_int_sequence[q+1]];
	  if(type != 0){
	    if(q-p+2 <= _maximal_span+1 && _Beta_stem[p-1][q-p+2] != -INF){
	      temp = logsumexp(temp,_Beta_stem[p-1][q-p+2]+LoopEnergy(type,type2,p,q+1,p+1,q));
	    }
	  }
	}
	_Beta_stem[p][q-p] = temp;
	
	if(_Beta_multi2[p][q-p] != -INF){
	  type2 = rtype[type2];
	  temp = _Beta_multi2[p][q-p] + MLintern + CalcDangleEnergy(type2,p,q);
	  _Beta_stem[p][q-p] = logsumexp(temp,_Beta_stem[p][q-p]);
	}
      }else{
	_Beta_stem[p][q-p] = -INF;
      }
    }
  }
}
Exemple #23
0
void CapR::CalcInsideVariable(){
  for (int j =TURN+1; j <= _seq_length; j++){
    for (int i=j-TURN; i >= max(0,j-_maximal_span-1); i--){
      //Alpha_stem
      int type = BP_pair[_int_sequence[i+1]][_int_sequence[j]];
      int type2 = BP_pair[_int_sequence[i+2]][_int_sequence[j-1]];
      
      double temp = 0; bool flag = 0;
      if (type != 0) {
	type2 = rtype[type2];
	if(_Alpha_stem[i+1][j-i-2] != -INF){
	  //Stem¨Stem
	  if(type2 != 0){
	    temp = _Alpha_stem[i+1][j-i-2]+ LoopEnergy(type, type2,i+1,j,i+2,j-1);
	  }
	  flag = 1;
	}
    
	if(_Alpha_stemend[i+1][j-i-2] != -INF){
	  //Stem¨StemEnd
	  temp = flag == 1 ? logsumexp(temp,_Alpha_stemend[i+1][j-i-2]) : _Alpha_stemend[i+1][j-i-2];
	  flag = 1;
	}

	_Alpha_stem[i][j-i] = flag == 0 ? -INF : temp;
      }else{
	_Alpha_stem[i][j-i] = -INF;
      }
      
      //Alpha_multiBif
      temp = 0; flag = 0;
      for (int k=i+1; k<=j-1; k++){
	if(_Alpha_multi1[i][k-i] != -INF && _Alpha_multi2[k][j-k] != -INF){
	  temp = flag == 0 ? _Alpha_multi1[i][k-i]+_Alpha_multi2[k][j-k] : logsumexp(temp,_Alpha_multi1[i][k-i]+_Alpha_multi2[k][j-k]);
	  flag = 1;
	}
      }
      _Alpha_multibif[i][j-i] = flag == 0 ? -INF : temp;
      
      //Alpha_multi2
      temp = 0; flag = 0; 
      if (type != 0) {
	if(_Alpha_stem[i][j-i] != -INF){
	  temp = _Alpha_stem[i][j-i]+MLintern+CalcDangleEnergy(type,i,j);
	  flag = 1;
	}
      }
      if(_Alpha_multi2[i][j-i-1] != -INF){
	_Alpha_multi2[i][j-i] = _Alpha_multi2[i][j-i-1]+MLbase;
	if(flag == 1){
	  _Alpha_multi2[i][j-i] = logsumexp(temp,_Alpha_multi2[i][j-i]);
	}
      }else{
	_Alpha_multi2[i][j-i] = flag == 0 ? -INF : temp;
      }
      
      //Alpha_multi1
      if(_Alpha_multi2[i][j-i] != -INF && _Alpha_multibif[i][j-i] != -INF){
	_Alpha_multi1[i][j-i] = logsumexp(_Alpha_multi2[i][j-i],_Alpha_multibif[i][j-i]);
      }else if(_Alpha_multi2[i][j-i] == -INF){
	_Alpha_multi1[i][j-i] = _Alpha_multibif[i][j-i];
      }else if(_Alpha_multibif[i][j-i] == -INF){
	_Alpha_multi1[i][j-i] = _Alpha_multi2[i][j-i];
      }else{
	_Alpha_multi1[i][j-i] = -INF;
      }
      
      //Alpha_multi
      flag = 0;
      if(_Alpha_multi[i+1][j-i-1] != -INF){
	_Alpha_multi[i][j-i] = _Alpha_multi[i+1][j-i-1]+MLbase;
	flag = 1;
      }
      
      if(flag == 1){
	if(_Alpha_multibif[i][j-i] != -INF){
	  _Alpha_multi[i][j-i] = logsumexp(_Alpha_multi[i][j-i],_Alpha_multibif[i][j-i]);
	}
      }else{
	_Alpha_multi[i][j-i] = _Alpha_multibif[i][j-i];
      }
      
      //Alpha_stemend
      if(j != _seq_length){
	temp = 0;
	type = BP_pair[_int_sequence[i]][_int_sequence[j+1]];
	if (type!=0) {
	  //StemEnd¨sn
	  temp = HairpinEnergy(type, i,j+1);
	  
	  //StemEnd¨sm_Stem_sn
	  for (int p =i; p <= min(i+MAXLOOP,j-TURN-2); p++) {
	    int u1 = p-i;
	    for (int q=max(p+TURN+2,j-MAXLOOP+u1); q<=j; q++) {
	      type2 = BP_pair[_int_sequence[p+1]][_int_sequence[q]];
	      if(_Alpha_stem[p][q-p] != -INF){
		if (type2 != 0 && !(p == i && q == j)) {
		  type2 = rtype[type2];
		  temp = logsumexp(temp,_Alpha_stem[p][q-p]+LoopEnergy(type, type2,i,j+1,p+1,q)); 
		}
	      }
	    }
	  }
	  
	  //StemEnd¨Multi
	  int tt = rtype[type];
	  temp = logsumexp(temp,_Alpha_multi[i][j-i]+MLclosing+MLintern+dangle3[tt][_int_sequence[i+1]]+dangle5[tt][_int_sequence[j]]);
	  _Alpha_stemend[i][j-i] = temp;
	}else{
	  _Alpha_stemend[i][j-i] = -INF;
	}
      }
    }
  }
  
  //Alpha_Outer
  for(int i = 1;i <= _seq_length;i++){
    double temp = _Alpha_outer[i-1];
    for(int p = max(0,i-_maximal_span-1); p <i;p++){
      if(_Alpha_stem[p][i-p] != -INF){
	int type = BP_pair[_int_sequence[p+1]][_int_sequence[i]];
	double ao = _Alpha_stem[p][i-p]+CalcDangleEnergy(type,p,i);
	temp = logsumexp(temp,ao+_Alpha_outer[p]);
      }
    }
    _Alpha_outer[i] = temp;
  }
}
Exemple #24
0
  /************************************************
   * Build CG of a given doc with a latent sequence
   *
   * doc: 
   * cg: computation graph
   * latseq: latent sequence from decoding
   * obsseq: latent sequence from observation
   * flag: what we expected to get from this function
   *       "PROB": compute the probability of the last sentence 
   *               given the latent value
   *       "ERROR": compute the prediction error of entire doc
   *       "INFER": compute prediction error on words with 
   *                inferred latent variables
   ************************************************/
  Expression BuildRelaGraph(const Doc& doc, ComputationGraph& cg,
			    LatentSeq latseq, LatentSeq obsseq){
    builder.new_graph(cg);
    // define expression
    Expression i_R = parameter(cg, p_R);
    Expression i_bias = parameter(cg, p_bias);
    Expression i_context = parameter(cg, p_context);
    Expression i_L = parameter(cg, p_L);
    Expression i_lbias = parameter(cg, p_lbias);
    vector<Expression> negloglik, neglogprob;
    // -----------------------------------------
    // check hidden variable list
    assert(latseq.size() <= doc.size());
    // -----------------------------------------
    // iterate over latent sequences
    // get LV-related transformation matrix
    Expression i_h_t;
    vector<Expression> obj;
    for (unsigned k = 0; k < doc.size(); k++){
      auto& sent = doc[k];
      // start a new sequence for each sentence
      Expression cvec;
      if (k == 0){
	cvec = i_context;
      } else {
	cvec = input(cg, {(unsigned)final_h.size()}, final_h);
      }
      // two parts of the objective function
      Expression sent_objpart1;
      vector<Expression> sent_objpart2;
      for (int latval = 0; latval < nlatvar; latval ++){
	builder.start_new_sequence();
	// latent variable distribution
	vector<Expression> l_negloglik;
	Expression l_neglogprob = pickneglogsoftmax((i_L * cvec) + i_lbias, latval); 
	// build RNN for the current sentence
	Expression i_x_t, i_h_t, i_y_t, i_negloglik;
	Expression i_Tk = lookup(cg, p_T, latval);
	// for each word
	unsigned slen = sent.size() - 1;
	for (unsigned t = 0; t < slen; t++){
	  // get word representation
	  i_x_t = const_lookup(cg, p_W, sent[t]);
	  vector<Expression> vecexp;
	  vecexp.push_back(i_x_t);
	  vecexp.push_back(cvec);
	  i_x_t = concatenate(vecexp);
	  // compute hidden state
	  i_h_t = builder.add_input(i_Tk * i_x_t);
	  // compute prediction
	  i_y_t = (i_R * i_h_t) + i_bias;
	  // get prediction error
	  i_negloglik = pickneglogsoftmax(i_y_t, sent[t+1]);
	  // add back
	  l_negloglik.push_back(i_negloglik);
	}
	// update context vector
	if (latval == (nlatvar - 1)){
	  final_h.clear();
	  final_h = as_vector(i_h_t.value());
	}
	// - log P(Y, Z) given Y and a specific Z value
	Expression pxz = sum(l_negloglik) + l_neglogprob;
	sent_objpart2.push_back(pxz * (-1.0));
	if (obsseq[k] == latval){
	  sent_objpart1 = pxz * (-1.0);
	}
      }
      // if the latent variable is observed
      if (obsseq[k] >= 0){
	Expression sent_obj = logsumexp(sent_objpart2) - sent_objpart1;
	obj.push_back(sent_obj);
	// cout << as_scalar(sent_obj.value()) << endl;
      }
    }
    // get the objectve for entire doc
    if (obj.size() > 0){
      // if at least one observed latent value
      return sum(obj);
    } else {
      // otherwise
      Expression zero = input(cg, 0.0);
      return zero;
    }
  }
Exemple #25
0
//return z
static REAL_SCORES calc_inside(const int length, REAL_SCORES *beta,OneScores<REAL_SCORES>& probs)
{
	int key, key1, key2;

	for(int i = 0; i < length; i++){
		key = getKey(i, i, 0, 1, length);
		beta[key] = 0.0;
		key = getKey(i, i, 1, 1, length);
		beta[key] = 0.0;
	}

	for(int j = 1; j < length; j++){
		for(int s = 0; s + j < length; s++){
			int t = s + j;
			//double prodProb_st = probs[s][t][0];
			//double prodProb_ts = probs[s][t][1];

			//init beta
			//incomplete spans
			//r == s
			int key_st_0 = getKey(s, t, 0, 0, length);
			//double prodProb_sst = probs_trips[s][s][t] + probs_sibs[s][t][0] + prodProb_st;
			REAL_SCORES prodProb_sst = probs[get_index2_o2sib(length,s,s,t)];
			key1 = getKey(s, s, 0, 1, length);
			key2 = getKey(s + 1, t, 1, 1, length);
			beta[key_st_0] = logsumexp(beta[key_st_0], beta[key1] + beta[key2] + prodProb_sst, true);

			//r == t
			int key_ts_0 = getKey(s, t, 1, 0, length);
			//double prodProb_tts = probs_trips[t][t][s] + probs_sibs[t][s][0] + prodProb_ts;
			REAL_SCORES prodProb_tts = probs[get_index2_o2sib(length,t,t,s)];
			key1 = getKey(s, t - 1, 0, 1, length);
			key2 = getKey(t, t, 1, 1, length);
			beta[key_ts_0] = logsumexp(beta[key_ts_0], beta[key1] + beta[key2] + prodProb_tts, true);

			//sibling spans
			int key_st_2 = getKey(s, t, 0, 2, length);
			beta[key_st_2] = 0.0;
			int key_ts_2 = getKey(s, t, 1, 2, length);
			beta[key_ts_2] = 0.0;
			bool flg_st_2 = true, flg_ts_2 = true;

			//complete spans
			int key_st_1 = getKey(s, t, 0, 1, length);
			beta[key_st_1] = 0.0;
			int key_ts_1 = getKey(s, t, 1, 1, length);
			beta[key_ts_1] = 0.0;
			bool flg_st_1 = true, flg_ts_1 = true;

			//calc sibling spans
			for(int r = s; r < t; r++){
				key1 = getKey(s, r, 0 ,1, length);
				key2 = getKey(r + 1, t, 1, 1, length);

				beta[key_st_2] = logsumexp(beta[key_st_2], beta[key1] + beta[key2], flg_st_2);
				flg_st_2 = false;

				beta[key_ts_2] = logsumexp(beta[key_ts_2], beta[key1] + beta[key2], flg_ts_2);
				flg_ts_2 = false;
			}

			//calc incomplete spans
			for(int r = s + 1; r < t; r++){
				key1 = getKey(s, r, 0, 0, length);
				key2 = getKey(r, t, 0, 2, length);
				//double prodProb_srt = probs_trips[s][r][t] + probs_sibs[r][t][1] + prodProb_st;
				REAL_SCORES prodProb_srt = probs[get_index2_o2sib(length,s,r,t)];
				beta[key_st_0] = logsumexp(beta[key_st_0], beta[key1] + beta[key2] + prodProb_srt, false);

				key1 = getKey(s, r, 1, 2, length);
				key2 = getKey(r, t, 1, 0, length);
				//double prodProb_trs = probs_trips[t][r][s] + probs_sibs[r][s][1] + prodProb_ts;
				REAL_SCORES prodProb_trs = probs[get_index2_o2sib(length,t,r,s)];
				beta[key_ts_0] = logsumexp(beta[key_ts_0], beta[key1] + beta[key2] + prodProb_trs, false);
			}

			//calc complete spans
			for(int r = s; r <= t; r++){
				if(r != s){
					key1 = getKey(s, r, 0, 0, length);
					key2 = getKey(r, t, 0, 1, length);
					beta[key_st_1] = logsumexp(beta[key_st_1], beta[key1] + beta[key2], flg_st_1);
					flg_st_1 = false;
				}
				if(r != t){
					key1 = getKey(s, r, 1, 1, length);
					key2 = getKey(r, t, 1, 0, length);
					beta[key_ts_1] = logsumexp(beta[key_ts_1], beta[key1] + beta[key2], flg_ts_1);
					flg_ts_1 = false;
				}
			}
		}
	}

	key1 = getKey(0, length - 1, 0, 1, length);
	key2 = getKey(0, length - 1, 1, 1, length);
	return logsumexp(beta[key1], beta[key2], false);
}
Exemple #26
0
static void calc_outside(const int length,const REAL_SCORES *beta,OneScores<REAL_SCORES>& probs,REAL_SCORES *alpha)
{
	int key;
	int end = length - 1;
	for(int d = 0; d < 2; d++){
		for(int c = 0 ; c < 3; c++){
			key = getKey(0, end, d, c, length);
			alpha[key] = 0.0;
		}
	}

	for(int j = end; j >= 1; j--){
		for(int s = 0; s + j < length; s++){
			int t = s + j;

			int key_a, key_b;

			//init alpha
			//sibling spans
			int key_st_2 = getKey(s, t, 0, 2, length);
			alpha[key_st_2] = 0.0;
			bool flg_st_2 = true;
			for(int r = 0; r < s; r++){
				//double prodProb_rst = probs_trips[r][s][t] + probs_sibs[s][t][1] + probs[r][t][0];
				REAL_SCORES prodProb_rst = probs[get_index2_o2sib(length,r,s,t)];
				key_b = getKey(r, s, 0, 0, length);
				key_a = getKey(r, t, 0, 0, length);
				alpha[key_st_2] = logsumexp(alpha[key_st_2], beta[key_b] + alpha[key_a] + prodProb_rst, flg_st_2);
				flg_st_2 = false;
			}
			for(int r = t + 1; r < length; r++){
				//double prodProb_rts = probs_trips[r][t][s] + probs_sibs[t][s][1] + probs[s][r][1];
				REAL_SCORES prodProb_rts = probs[get_index2_o2sib(length,r,t,s)];
				key_b = getKey(t, r, 1, 0, length);
				key_a = getKey(s, r, 1, 0, length);
				alpha[key_st_2] = logsumexp(alpha[key_st_2], beta[key_b] + alpha[key_a] + prodProb_rts, flg_st_2);
				flg_st_2 = false;
			}

			//complete spnas
			int key_st_1 = getKey(s, t, 0, 1, length);
			bool flg_st_1 = true;
			alpha[key_st_1] = 0.0;
			if(t + 1 < length){
				key_a = getKey(s, t + 1, 1, 0, length);
				//double prodProb = probs_trips[t + 1][t + 1][s] + probs_sibs[t + 1][s][0] + probs[s][t + 1][1];
				REAL_SCORES prodProb = probs[get_index2_o2sib(length,t+1,t+1,s)];
				alpha[key_st_1] = logsumexp(alpha[key_st_1], alpha[key_a] + prodProb, flg_st_1);
				flg_st_1 = false;
			}

			int key_ts_1 = getKey(s, t, 1, 1, length);
			bool flg_ts_1 = true;
			alpha[key_ts_1] = 0.0;
			if(s != 0){
				key_a = getKey(s - 1, t, 0, 0, length);
				//double prodProb = probs_trips[s - 1][s - 1][t] + probs_sibs[s - 1][t][0] + probs[s - 1][t][0];
				REAL_SCORES prodProb = probs[get_index2_o2sib(length,s-1,s-1,t)];
				alpha[key_ts_1] = logsumexp(alpha[key_ts_1], alpha[key_a] + prodProb, flg_ts_1);
				flg_ts_1 = false;
			}

			for(int r = 0; r < s; r++){
				key_b = getKey(r, s, 0, 0, length);
				key_a = getKey(r, t, 0, 1, length);
				alpha[key_st_1] = logsumexp(alpha[key_st_1], beta[key_b] + alpha[key_a], flg_st_1);
				flg_st_1 = false;

				if(!((r == 0) && (t == length -1))){
					key_b = getKey(r, s - 1, 0 ,1, length);
					key_a = getKey(r, t, 0, 2, length);
					alpha[key_ts_1] = logsumexp(alpha[key_ts_1], beta[key_b] + alpha[key_a], flg_ts_1);
					flg_ts_1 = false;
				}
			}
			for(int r = t + 1; r < length; r++){
				if(!((s == 0) && (r == length -1))){
					key_b = getKey(t + 1, r, 1, 1, length);
					key_a = getKey(s, r, 0 ,2, length);
					alpha[key_st_1] = logsumexp(alpha[key_st_1], beta[key_b] + alpha[key_a], flg_st_1);
					flg_st_1 = false;
				}

				key_b = getKey(t, r, 1, 0, length);
				key_a = getKey(s, r, 1, 1, length);
				alpha[key_ts_1] = logsumexp(alpha[key_ts_1], beta[key_b] + alpha[key_a], flg_ts_1);
				flg_ts_1 = false;
			}

			//incomplete spans
			int key_st_0 = getKey(s, t, 0, 0, length);
			alpha[key_st_0] = 0.0;
			bool flg_st_0 = true;

			int key_ts_0 = getKey(s, t, 1, 0, length);
			alpha[key_ts_0] = 0.0;
			bool flg_ts_0 = true;

			for(int r = t; r < length; r++){
				key_b = getKey(t, r, 0 ,1, length);
				key_a = getKey(s, r, 0 ,1, length);
				alpha[key_st_0] = logsumexp(alpha[key_st_0], beta[key_b] + alpha[key_a], flg_st_0);
				flg_st_0 = false;

				if(r != t){
					key_b = getKey(t, r, 0, 2, length);
					key_a = getKey(s, r, 0, 0, length);
					//double prodProb_str = probs_trips[s][t][r] + probs_sibs[t][r][1] + probs[s][r][0];
					REAL_SCORES prodProb_str = probs[get_index2_o2sib(length,s,t,r)];
					alpha[key_st_0] = logsumexp(alpha[key_st_0], beta[key_b] + alpha[key_a] + prodProb_str, flg_st_0);
					flg_st_0 = false;
				}
			}

			for(int r = 0; r <= s; r++){
				key_b = getKey(r, s, 1, 1, length);
				key_a = getKey(r, t, 1, 1, length);
				alpha[key_ts_0] = logsumexp(alpha[key_ts_0], beta[key_b] + alpha[key_a], flg_ts_0);
				flg_ts_0 = false;

				if(r != s){
					key_b = getKey(r, s, 0, 2, length);
					key_a = getKey(r, t, 1, 0, length);
					//double prodProb_tsr = probs_trips[t][s][r] + probs_sibs[s][r][1] + probs[r][t][1];
					REAL_SCORES prodProb_tsr = probs[get_index2_o2sib(length,t,s,r)];
					alpha[key_ts_0] = logsumexp(alpha[key_ts_0], beta[key_b] + alpha[key_a] + prodProb_tsr, flg_ts_0);
					flg_ts_0 = false;
				}
			}
		}
	}
}
Exemple #27
0
MatrixXf EMclustering::expectation(MatrixXf x, gaussian_model model, double &llh)
{
	//cerr<<"===="<<endl;
	MatrixXf mu(model.mu.rows(),model.mu.cols());
	mu = model.mu;
	MatrixXf *sigma;
	sigma = new MatrixXf[clusternum];
	//for(int i=0;i<clusternum;i++)
	//	sigma[i] =  model.sigma[i];
	//cerr<<"1"<<endl;
	sigma = model.sigma;
	VectorXf w(model.weight.size());
	w = model.weight;

	//cerr<<mu<<endl;
	//cerr<<w<<endl<<endl;
	//for(int i=0;i<clusternum;i++)cerr<<sigma[i]<<endl;
	//cerr<<endl;

	int n = x.cols();
	int k = mu.cols();
	MatrixXf logrho(n,k);
	logrho.setZero(n,k);
	//cerr<<logrho<<endl;
	
//cerr<<logrho<<endl<<endl;
	for(int i=0;i<k;i++)
	{
		//cerr<<i<<endl;
		logrho.col(i) = loggausspdf(x,mu.col(i),sigma[i]);
		//cerr<<mu.col(i)<<endl;
	}

	//cerr<<logrho<<endl<<endl;
	
	w = w.array().log();//cerr<<w<<endl<<endl;
	MatrixXf tmp1(logrho.rows(),logrho.cols());
	tmp1 = logrho.rowwise() + w.adjoint();
	logrho = tmp1;//cerr<<logrho<<endl<<endl;
	VectorXf t(logrho.rows());
	t = logsumexp(logrho,2);//cerr<<t<<endl<<endl;
	llh = t.sum()/n;//cerr<<llh<<endl<<endl;
	MatrixXf logr(logrho.rows(),logrho.cols());
	logr = logrho.colwise() - t;//cerr<<logr<<endl<<endl;
	MatrixXf r(logrho.rows(),logrho.cols());
	r = logr.array().exp();//cerr<<r<<endl<<endl;

	logrho.resize(0,0);
	mu.resize(0,0);
	w.resize(0);
	//for(int i=0;i<clusternum;i++)//..................
	//	sigma[i].resize(0,0);
	delete [] sigma;
	tmp1.resize(0,0);
	t.resize(0);
	logr.resize(0,0);
	
	//cerr<<r<<endl<<endl;
	//cerr<<llh<<endl;

	return r;
}
Exemple #28
0
void
dm_learn (document *data, double *lambda, double **alpha,
	  int nmixtures, int nlex, int emmax, int remmax, double epsilon)
{
	document *dp;
	double *d0, *f, **p;
	double *s, **mu, **eps, *beta;
	double aimv, z, t1, t2;
	double ppl, sppl, pplp = 0, spplp = 0;
	int i, j, m, n, t, v;
	int start, elapsed, step, steps = 0;

	/* initialize seed */
	srand(time(NULL));

	/* initialize lambda */
	for (i = 0; i < nmixtures; i++)
		lambda[i] = 1.0 / (double)nmixtures;

	/* count data length, allocate p */
	for (dp = data, n = 0; (dp->len) != -1; dp++, n++)
		;
	if ((p = dmatrix(n, nmixtures)) == NULL) {
		fprintf(stderr, "dm_learn:: can't allocate p.\n");
		return;
	}
	/* allocate d0, and cache */
	if ((d0 = (double *)calloc(n, sizeof(double))) == NULL) {
		fprintf(stderr, "dm_learn:: can't allocate d0.\n");
		return;
	}
	for (dp = data, i = 0; (dp->len) != -1; dp++, i++)
	{
		for (j = 0, z = 0; j < dp->len; j++)
			z += dp->cnt[j];
		d0[i] = z;
	}

	/* allocate eps */
	if ((eps = dmatrix(nmixtures,nlex)) == NULL) {
		fprintf(stderr, "dm_learn:: can't allocate eps.\n");
		return;
	}
	/* allocate beta, and initialize */
	if ((beta = (double *)calloc(nlex, sizeof(double))) == NULL) {
		fprintf(stderr, "dm_learn:: can't allocate beta.\n");
		return;
	}
	for (v = 0; v < nlex; v++)
		beta[v] = INITIAL_PCOUNT;

	/* allocate s, mu */
	if ((s = (double *)calloc(nmixtures, sizeof(double))) == NULL) {
		fprintf(stderr, "dm_learn:: can't allocate s.\n");
		return;
	}
	if ((mu = dmatrix(nmixtures, nlex)) == NULL) {
		fprintf(stderr, "dm_learn:: can't allocate mu.\n");
		return;
	}
	/* initialize s, mu */
	for (m = 0; m < nmixtures; m++)
		s[m] = INITIAL_PCOUNT * nlex;
	if ((f = (double *)calloc(nlex, sizeof(double))) == NULL) {
		fprintf(stderr, "dm_learn:: can't allocate f.\n");
		return;
	}
	for (dp = data, z = 0; (dp->len) != -1; dp++)
	{
		for (j = 0; j < dp->len; j++)
		{
			f[dp->id[j]] += dp->cnt[j];
			z += dp->cnt[j];
		}
	}
	for (i = 0; i < nlex; i++)
		f[i] /= z;
	for (m = 0; m < nmixtures; m++)
		dirrand(mu[m], f, nlex, INITIAL_PCOUNT * nlex * 100);
	for (m = 0; m < nmixtures; m++) {
		for (v = 0; v < nlex; v++)
			mu[m][v] += INITIAL_PCOUNT;
		for (v = 0; v < nlex; v++)
			mu[m][v] /= (1 + INITIAL_PCOUNT * nlex);
	}
	
	printf("number of documents   = %d\n", n);
	printf("number of words       = %d\n", nlex);
	printf("number of mixtures    = %d\n", nmixtures);
	printf("convergence criterion = %.6g %%\n", epsilon * 100);

	/*
	 *  learn main
	 *
	 */
	start = myclock();
	for (t = 0; t < emmax; t++)
	{
		/*
		 *  E step
		 *
		 */
		for (step = 1; step <= remmax; step++)
		{
			/* inner REM E step */
			printf("iteration %d/%d [REM %d+%d]..\t",
			       t + 1, emmax, step, steps);
			fflush(stdout);
			for (dp = data, i = 0; (dp->len) != -1; dp++, i++)
			{
				for (m = 0; m < nmixtures; m++)
				{
					for (j = 0, z = 0; j < dp->len; j++) {
						if (dp->cnt[j] == 1) {
							z += log(s[m]*mu[m][dp->id[j]]);
						} else {
							z += lgamma(s[m]*mu[m][dp->id[j]] + dp->cnt[j])
								- lgamma(s[m]*mu[m][dp->id[j]]);
						}
					}
					p[i][m] = log(lambda[m])
						  + lgamma(s[m]) - lgamma(s[m] + d0[i])
						  + z;
				}
				/* normalize, and exp */
				for (m = 0, z = 0; m < nmixtures; m++)
					z = logsumexp(z, p[i][m], (m == 0));
				for (m = 0; m < nmixtures; m++)
					p[i][m] = exp(p[i][m] - z);
			}
			/* inner REM M step */
			for (m = 0; m < nmixtures; m++)
			{
				for (v = 0; v < nlex; v++)
					eps[m][v] = beta[v];
				t1 = t2 = 0;
				for (dp = data, i = 0; (dp->len) != -1; dp++, i++)
				{
					for (j = 0, z = 0; j < dp->len; j++)
					{
						v = dp->id[j];
						if (dp->cnt[j] == 1) {
							aimv = 1;
						} else {
							aimv = s[m]*mu[m][v] *
								(psi(s[m]*mu[m][v] + dp->cnt[j])
								 - psi(s[m]*mu[m][v]));
						}
						eps[m][v] += p[i][m] * aimv;
						z += aimv;
					}
					t1 += p[i][m] * z;
					t2 += p[i][m] * (psi(s[m] + d0[i]) - psi(s[m]));
				}
				/* update s */
				s[m] = t1 / t2;
				/* update mu */
				for (v = 0, z = 0; v < nlex; v++)
					z += eps[m][v];
				for (v = 0; v < nlex; v++)
					mu[m][v] = eps[m][v] / z;
			}
			ppl = dm_ppl(data, lambda, s, mu, d0, nmixtures, nlex);
			printf("PPL = %.6g\r", ppl); fflush(stdout);
			if (fabs(pplp - ppl) / pplp < 1.0e-3)
				break;	/* inner loop converged */
			else
				pplp = ppl;
		}
		steps += step;
		
		/*
		 *  M step
		 *
		 */

		/* MLE lambda */
		for (m = 0; m < nmixtures; m++)
			lambda[m] = 0;
		for (dp = data, i = 0; (dp->len) != -1; dp++, i++)
		{
			for (m = 0; m < nmixtures; m++)
				lambda[m] += p[i][m];
		}
		/* normalize */
		for (m = 0, z = 0; m < nmixtures; m++)
			z += lambda[m];
		for (m = 0; m < nmixtures; m++)
			lambda[m] /= z;
		
		/* compute alpha */
		for (m = 0; m < nmixtures; m++)
			for (v = 0; v < nlex; v++)
				alpha[m][v] = s[m] * mu[m][v];

		/* MLE beta */
		newton_beta (beta, eps, nmixtures, nlex, 0);
		
		/* converged? */
		sppl = dm_ppl(data, lambda, s, mu, d0, nmixtures, nlex);
		elapsed = myclock() - start;
		if ((t > 1) && (spplp - sppl) / spplp < epsilon) {
			printf("\nconverged. [%s]\n", rtime(elapsed));
			free_dmatrix(mu, nmixtures);
			free_dmatrix(eps, nmixtures);
			free_dmatrix(p, n);
			free(beta);
			free(d0);
			free(s);
			free(f);
			return;
		}
		spplp = sppl;
		/*
		 *  ETA
		 *
		 */
		printf("iteration %2d/%d [REM %d+%d]..  \t",
		       t + 1, emmax, step, steps);
		printf("PPL = %.6g\t", sppl);
		printf("ETA:%s (%d sec/step)\r",
		       rtime(elapsed * ((double) emmax / (t + 1) - 1)),
		       (int)((double) elapsed / (t + 1) + 0.5));
		
	}
	printf("\nmaximum iteration reached. exiting..\n");
	
	free_dmatrix(mu, nmixtures);
	free_dmatrix(eps, nmixtures);
	free_dmatrix(p, n);
	free(beta);
	free(d0);
	free(s);
	free(f);
	return;

}
Exemple #29
0
void train() {
	/* initialize output */
	printf("init train\n");
	initTrain();

	/* initialize temp variables */
	double *myalpha_new = (double *) malloc(sizeof(double)*K);
	double *psi_sum_beta = (double *) malloc(sizeof(double)*M);
	double *psi_myalpha = (double *) malloc(sizeof(double)*K);
	double **log_myrho = (double **) malloc(sizeof(double*)*M);
	double **psi_mybeta = (double **) malloc(sizeof(double*)*M);
	for (int m = 0; m < M; m++) {
		log_myrho[m] = (double *) malloc(sizeof(double)*K);
		psi_mybeta[m] = (double *) malloc(sizeof(double)*K);
	}
	double **old_mytheta = (double **) malloc(sizeof(double*)*K);
	double **log_mytheta = (double **) malloc(sizeof(double*)*K);
	double **log_inv_mytheta = (double **) malloc(sizeof(double*)*K);
	for (int k = 0; k < K; k++) {
		old_mytheta[k] = (double *) malloc(sizeof(double)*L);
		for (int l = 0; l < L; l++) old_mytheta[k][l] = 0;
		log_mytheta[k] = (double *) malloc(sizeof(double)*L);
		log_inv_mytheta[k] = (double *) malloc(sizeof(double)*L);
	}
	double *g = (double *) malloc(sizeof(double)*K);
	double *q = (double *) malloc(sizeof(double)*K);

	double maxDiff = 0;

	for (int out_iter = 0; out_iter < OUT_LOOP; out_iter++) {
		if (out_iter % 100 == 0) printf("Iter: %d\n", out_iter);
		for (int k = 0; k < K; k++) {
			for (int l = 0; l < L; l++) {
#ifdef NEW_PRIOR
				if (A[l] == 0) continue;
#endif
				log_mytheta[k][l] = log(mytheta[k][l]);
				//printf("%lf ", log(mytheta[k][l]));
				log_inv_mytheta[k][l] = log(1-mytheta[k][l]);
			}
			//printf("\n");
		}
		/* e-step */
//		for (int in_iter = 0; in_iter < IN_LOOP; in_iter++) {
			//printf("in iter: %d\n", in_iter);
#pragma omp parallel shared(M,N,K,L,mybeta,psi_mybeta,log_myrho,log_mytheta,log_inv_mytheta,r)
			{
#pragma omp for schedule(dynamic,1)
				for (int m = 0; m < M; m++) {
					/* computer r */
					double sum_beta = 0;
					for (int k = 0; k < K; k++) {
						sum_beta += mybeta[m][k];
						psi_mybeta[m][k] = DiGamma_Function(mybeta[m][k]);
					}
					psi_sum_beta[m] = DiGamma_Function(sum_beta);
					for (int n = 0; n < N[m]; n++) {
						for (int k = 0; k < K; k++) {
							log_myrho[m][k] = psi_mybeta[m][k]-psi_sum_beta[m];
							for (int l = 0; l < L; l++) {
#ifdef NEW_PRIOR
								if (A[l] == 0) continue;
#endif

								if (R[m][n][l]) {
									log_myrho[m][k] += log_mytheta[k][l];
								} else {
									log_myrho[m][k] += log_inv_mytheta[k][l];
								}
							}
						}
						double log_sum_rho = logsumexp(log_myrho[m], K);
						for (int k = 0; k < K; k++) {
							r[m][n][k] = exp(log_myrho[m][k] - log_sum_rho);
						}
					}

					/* compute mybeta */
					for (int k = 0; k < K; k++) {
						mybeta[m][k] = myalpha[k];
						for (int n = 0; n < N[m]; n++) {
							mybeta[m][k] = mybeta[m][k] + r[m][n][k];
						}
					}
				}
			}
			/*
			printf("beta:\n");
			for (int m = 0; m < M; m++) {
				for (int k = 0; k < K; k++) {
					printf("%lf ", mybeta[m][k]);
				}
				printf("\n");
			}
			*/
//		}
#ifdef DEBUG
		printf("beta:\n");
		for (int m = 0; m < M; m++) {
			for (int k = 0; k < K; k++) {
				printf("%lf ", mybeta[m][k]);
			}
			printf("\n");
		}
#endif
		/* m-step */
		if (out_iter != OUT_LOOP - 1) {
			/* update alpha */
			if (mode == UPDATE_ALPHA) {
				for (int m = 0; m < M; m++) {
					double sum_beta = 0;
					for (int k = 0; k < K; k++) {
						sum_beta += mybeta[m][k];
						psi_mybeta[m][k] = DiGamma_Function(mybeta[m][k]);
					}
					psi_sum_beta[m] = DiGamma_Function(sum_beta);
				}
				int converge = 0;
				for (int iter = 0; iter < 1000; iter++) {
					double sum_alpha = 0;
					for (int k = 0; k < K; k++) {
						sum_alpha += myalpha[k];
						psi_myalpha[k] = DiGamma_Function(myalpha[k]);
					}
					double psi_sum_alpha = DiGamma_Function(sum_alpha);
					int fault;
					for (int k = 0; k < K; k++) {
						g[k] = M * (psi_sum_alpha - psi_myalpha[k]);
						for (int m = 0; m < M; m++) {
							g[k] += psi_mybeta[m][k] - psi_sum_beta[m];
						}
						q[k] = -M * trigamma(myalpha[k], &fault);
					}
					double z = M * trigamma(sum_alpha, &fault);
					double gq = 0;
					double rq = 0;
					for (int k = 0; k < K; k++) {
						gq = gq + g[k] / q[k];
						rq = rq + 1 / q[k];
					}
					double b = gq / (1 / z + rq);
					for (int k = 0; k < K; k++) {
						myalpha_new[k] = myalpha[k] - (g[k] - b) / q[k];
						if (myalpha_new[k] < 0) {
							printf("warning alpha small than zero\n");
						}
					}
#ifdef DEBUG
					printf("alpha:\n");
					for (int k = 0; k < K; k++) {
						printf("%lf ", myalpha[k]);
					}
					printf("\n");
#endif

					converge = 1; 
					for (int k = 0; k < K; k++) {
						double diff = myalpha_new[k] - myalpha[k];
						if (diff > 1e-6 || diff < -1e-6) {
							converge = 0;
							break;
						}
					}
					if (converge) {
						break;
					}

					double *tmpalpha = myalpha;
					myalpha = myalpha_new;
					myalpha_new = tmpalpha;
				}
				if (!converge) {
					printf("warning: not converge\n");
				}
			}

			/* update theta */
#pragma omp parallel shared(K,N,L,M,mytheta,r,R)
			{
#pragma omp for schedule(dynamic,1)
				for (int k = 0; k < K; k++) {
					for (int l = 0; l < L; l++) {
						double rR = 0;
						double sum_r = 0;
#ifdef PRIOR
						rR += A;
						sum_r += A + B;
#endif

#ifdef NEW_PRIOR
						if (A[l] == 0) continue;
						rR += A[l];
						sum_r += A[l] + B[l];
#endif

						for (int m = 0; m < M; m++) {
							for (int n = 0; n < N[m]; n++) {
								rR += r[m][n][k]*R[m][n][l];
								sum_r += r[m][n][k];
							}
						}
						mytheta[k][l] = rR / sum_r;
						if (EQUAL(rR,0.0)) {
							mytheta[k][l] = 0;
						}
						if (mytheta[k][l] < 0 || mytheta[k][l] > 1 || mytheta[k][l] != mytheta[k][l]) {
							printf("error %lf %lf\n", rR, sum_r);
						}
					}
				}
			}

			maxDiff = 0;
			for (int k = 0; k < K; k++ ){
				for (int l = 0; l < L; l++) {
#ifdef NEW_PRIOR
					if (A[l] == 0) continue;
#endif
					double diff = old_mytheta[k][l] - mytheta[k][l];
					if (diff > maxDiff) maxDiff = diff;
					if	(-diff > maxDiff) maxDiff = -diff;
					old_mytheta[k][l] = mytheta[k][l];
				}
			}
			if (maxDiff < 1e-6) {
				printf("Finished.\n");
				break;
			}

#ifdef DEBUG
			printf("theta:\n");
			for (int k = 0; k < K; k++) {
				for (int l = 0; l < L; l++) {
					printf("%lf ", mytheta[k][l]);
				}
				printf("\n");
			}
#endif
		}
	}

	/* free temp variables */
	free(g);
	free(q);
	for (int k = 0; k < K; k++) {
		free(log_inv_mytheta[k]);
		free(log_mytheta[k]);
		free(old_mytheta[k]);
	}
	free(old_mytheta);
	free(log_inv_mytheta);
	free(log_mytheta);
	for (int m = 0; m < M; m++) {
		free(psi_mybeta[m]);
		free(log_myrho[m]);
	}
	free(psi_mybeta);
	free(log_myrho);
	free(psi_sum_beta);
	free(psi_myalpha);
	free(myalpha_new);
}