Пример #1
0
void VarRTM::RunEM(RTM* m) {
  m->Init(cor.TermNum());
  RTMSuffStats ss;
  ss.InitSS(m->TopicNum(), m->TermNum());
  MStep(ss, m);
  m->alpha = initial_alpha_;
  for (int i = 0; i < em_max_iter_; i++) {
    ss.SetZero(m->TopicNum(), m->TermNum());
    EStep(cor, *m, &ss);
    MStep(ss, m);
    Vec alpha(m->TopicNum());
    LearningEta(alpha, ss.z_bar, &(m->eta));
    LOG(INFO) << i << " AUC:" << PredictAUC(*m, ss.z_bar);
  }
}
Пример #2
0
void VarMGCTM::RunEM(CorpusC &test, MGCTM* m) {
  MGSS ss;
  ss.CorpusInit(cor_, *m);
  MStep(ss, m);
  LOG(INFO) << m->pi.transpose();
  for (int i = 0; i < converged_.em_max_iter_; i++) {
    std::vector<MGVar> vars(cor_.Len());
    VReal likelihoods(cor_.Len());
    #pragma omp parallel for
    for (size_t d = 0; d < cor_.Len(); d++) {
      likelihoods[d] = Infer(cor_.docs[d], *m, &vars[d]);
    }

    double likelihood = 0;
    VStr etas(cor_.Len());
    ss.SetZero(m->GTopicNum(), m->LTopicNum1(), m->LTopicNum2(), m->TermNum());
    for (size_t d = 0; d < cor_.Len(); d++) {
      DocC &doc = cor_.docs[d];
      for (size_t n = 0; n < doc.ULen(); n++) {
        for (int k = 0; k < m->GTopicNum(); k++) {
          ss.g_topic(k, doc.Word(n)) += doc.Count(n)*vars[d].g_z(k, n)*
                                     (1 - vars[d].delta[n]);
          ss.g_topic_sum[k] += doc.Count(n)*vars[d].g_z(k, n)*(1 - vars[d].delta[n]);
        }
      }
      for (int j = 0; j < m->LTopicNum1(); j++) {
        for (size_t n = 0; n < doc.ULen(); n++) {
          for (int k = 0; k < m->LTopicNum2(); k++) {
            ss.l_topic[j](k, doc.Word(n)) += doc.Count(n)*vars[d].l_z[j](k, n)
                                *vars[d].delta[n]*vars[d].eta[j];
            ss.l_topic_sum(k, j) += doc.Count(n)*vars[d].l_z[j](k, n) *
                                  vars[d].delta[n] * vars[d].eta[j];
          }
        }
      }
      for (int j = 0; j < m->LTopicNum1(); j++) {
        ss.pi[j] += vars[d].eta[j];
      }

      etas[d] = EVecToStr(vars[d].eta);
      likelihood += likelihoods[d];
    }
    MStep(ss, m);
    LOG(INFO) << m->pi.transpose();
    OutputFile(*m, Join(etas,"\n"), i);
//    LOG(INFO) <<"perplexity: " <<Infer(test,*m);
  }
}
Пример #3
0
int MbICPmatcher(Tpfp *laserK, Tpfp *laserK1,
		Tsc *sensorMotion, Tsc *solution, unsigned *numiter){

	int resEStep=1;
	int resMStep=1;
	int numIteration=0;


	// Preprocess both scans
	preProcessingLib(laserK,laserK1,sensorMotion);



	while (numIteration<params.MaxIter){


		// Compute the correspondences of the MbICP
		resEStep=EStep();;





		if (resEStep!=1)
		return -1;


	#ifdef DRAW_PNG
		if (draw_iterations)
		{	// write associations and scans to disk so an external program can visualize them

			write_associations(numIteration);
			//write_scans(numIteration);
		}
	#endif

		// Minize and compute the solution
		resMStep=MStep(solution);



		*numiter=numIteration;

		if (resMStep==1)
		return 1;
		else if (resMStep==-1)
		return -2;
		else
		numIteration++;
	}

	return 2;

}
Пример #4
0
  void train( const std::vector<std::vector<std::pair<int,double> > >& X,
              const std::vector<std::string> &dict,
              int K,
              std::vector<double> &pw_z, std::vector<double> &pd_z, std::vector<double> &pz,
              Options options )
  {
    std::mt19937 rng;
    
    int N = static_cast<int>( X.size() );
    int W = static_cast<int>( dict.size() );
    
    // init pz;
    fullrand( pz, K, rng );
    normalize( &pz[0] , K);

    // init pw _z
    fullrand( pw_z, K * W, rng );
    normalize_at0( pw_z, W );

    // init pd_z
    fullrand( pd_z, K * N, rng );
    normalize_at0( pd_z, N );
    
    // init pz_dw
    std::vector<double> pz_dw( N * W * K, 0.0 );

    for ( int iter=0; iter<options.maxIter; iter++ ) {
      EStep( N, W, K, pw_z, pd_z, pz, pz_dw );
      MStep( N, W, K, X, pw_z, pd_z, pz, pz_dw );
      printf( "iter %d: ernergy = %.6lf\n", iter, calc_energy( N, W, K, 
                                                               X,
                                                               pw_z,
                                                               pd_z,
                                                               pz ) );
    }
  }
Пример #5
0
// CEM(StartFile) - Does a whole CEM algorithm from a random start
// optional start file loads this cluster file to start iteration
// if Recurse is 0, it will not try and split.
// if InitRand is 0, use cluster assignments already in structure
float KK::CEM(const mxArray *InputClass/*= NULL*/, int Recurse /*=1*/, int InitRand /*=1*/)  {
	int p, c;
	int nChanged;
	int Iter;
	Array<int> OldClass(nPoints);
	float Score = HugeScore, OldScore;
	int LastStepFull; // stores whether the last step was a full one
    int DidSplit;

    if (InputClass!= NULL) LoadClu(InputClass);
	else if (InitRand) {
        // initialize data to random
        if (nStartingClusters>1)
    	    for(p=0; p<nPoints; p++) Class[p] = irand(1, nStartingClusters-1);
        else
            for(p=0; p<nPoints; p++) Class[p] = 0;

		for(c=0; c<MaxPossibleClusters; c++) ClassAlive[c] = (c<nStartingClusters);
    }

	// set all clases to alive
    Reindex();

	// main loop
	Iter = 0;
	FullStep = 1;
	do {
		// Store old classifications
		for(p=0; p<nPoints; p++) OldClass[p] = Class[p];

		// M-step - calculate class weights, means, and covariance matrices for each class
		MStep();

		// E-step - calculate scores for each point to belong to each class
		EStep();

		// dump distances if required

		//if (DistDump) MatPrint(Distfp, LogP.m_Data, DistDump, MaxPossibleClusters);

		// C-step - choose best class for each
		CStep();

		// Would deleting any classes improve things?
		if(Recurse) ConsiderDeletion();

		// Calculate number changed
		nChanged = 0;
		for(p=0; p<nPoints; p++) nChanged += (OldClass[p] != Class[p]);

		// Calculate score
		OldScore = Score;
		Score = ComputeScore();

		if(Verbose>=1) {
            if(Recurse==0) Output("\t");
            Output("Iteration %d%c: %d clusters Score %.7g nChanged %d\n",
			    Iter, FullStep ? 'F' : 'Q', nClustersAlive, Score, nChanged);
        }

		Iter++;

		/*
		if (Debug) {
			for(p=0;p<nPoints;p++) BestClass[p] = Class[p];
			SaveOutput(BestClass);
			Output("Press return");
			getchar();
		}*/

		// Next step a full step?
		LastStepFull = FullStep;
		FullStep = (
						nChanged>ChangedThresh*nPoints
						|| nChanged == 0
						|| Iter%FullStepEvery==0
					//	|| Score > OldScore Doesn't help!
					//	Score decreases are not because of quick steps!
					) ;
		if (Iter>MaxIter) {
			Output("Maximum iterations exceeded\n");
			break;
		}

        // try splitting
        if ((Recurse && SplitEvery>0) && (Iter%SplitEvery==SplitEvery-1 || (nChanged==0 && LastStepFull))) {
            DidSplit = TrySplits();
        } else DidSplit = 0;

	} while (nChanged > 0 || !LastStepFull || DidSplit);

	//if (DistDump) fprintf(Distfp, "\n");

	return Score;
}
Пример #6
0
//------------------------------------------
//------------------------------------------
void XEMBinaryParameter::updateForCV(XEMModel * originalModel, XEMCVBlock & CVBlock){
  //updates tabProportion, tabCenter and tabScatter
  // Mstep could be could to do that
  // even if this is a few slower function versus a real update
  MStep();
}