示例#1
0
void InferenceEngineBP::computeBeliefs(Beliefs &beliefs, FeatureGenerator *fGen, 
	DataSequence *X, Model *m, int bComputePartition, int seqLabel, bool bUseStatePerNodes)
{  
	// Variable definition  
	int xi, xj, nbNodes, seqLength;
	std::map<int,BPNode*> nodes; // tree graph
	std::map<int,BPNode*>::iterator itm;
	std::list<BPNode*>::iterator itl;
	BPNode* root;
	iMatrix adjMat;
	iVector nbStates;
	dVector*  phi_i  = 0; // singleton potentials
	dMatrix** phi_ij = 0; // pairwise potentials
	dVector** msg    = 0; // messages

	if( m->isMultiViewMode() )
		m->getAdjacencyMatrixMV(adjMat, X);
	else {
		uMatrix uAdjMat;
		m->getAdjacencyMatrix(uAdjMat, X);
		adjMat.resize(uAdjMat.getWidth(),uAdjMat.getHeight());
		for(xi=0; xi<uAdjMat.getHeight(); xi++)
			for(xj=0; xj<uAdjMat.getWidth(); xj++)
				adjMat(xi,xj) = uAdjMat(xi,xj);
	}

	nbNodes = adjMat.getHeight(); 
	seqLength = X->length();

	// Create a vector that contains nbStates
	nbStates.create(nbNodes);
	for(xi=0; xi<nbNodes; xi++)
		nbStates[xi] = (m->isMultiViewMode()) 
			? m->getNumberOfStatesMV(xi/seqLength) : m->getNumberOfStates();

	// Create BPGraph from adjMat
	for(xi=0; xi<nbNodes; xi++) {		
		BPNode* v = new BPNode(xi, nbStates[xi]);
		nodes.insert( std::pair<int,BPNode*>(xi,v) );
	}
	for(xi=0; xi<nbNodes; xi++) {
		for(xj=xi+1; xj<nbNodes; xj++) {
			if( !adjMat(xi,xj) ) continue;
			nodes[xi]->addNeighbor(nodes[xj]);
			nodes[xj]->addNeighbor(nodes[xi]);
		}
	}

	// Initialize  
	initMessages(msg, X, m, adjMat, nbStates);
	initBeliefs(beliefs, X, m, adjMat, nbStates);
	initPotentials(phi_i, phi_ij, fGen, X, m, adjMat, nbStates, seqLabel);
	
	// Message update
	root = nodes[0]; // any node can be the root node
	{
		for(itl=root->neighbors.begin(); itl!=root->neighbors.end(); itl++)
			collect(root, *itl, phi_i, phi_ij, msg);
		for(itl=root->neighbors.begin(); itl!=root->neighbors.end(); itl++)
			distribute(root, *itl, phi_i, phi_ij, msg);
	}
	updateBeliefs(beliefs, phi_i, phi_ij, msg, X, m, adjMat);

	// Clean up
	for(xi=0; xi<nbNodes; xi++) { 		
		delete[] msg[xi]; msg[xi] = 0; 
		delete[] phi_ij[xi]; phi_ij[xi] = 0;
	}
	delete[] msg; msg=0;
	delete[] phi_i; phi_i = 0;
	delete[] phi_ij;  phi_ij  = 0; 

	for(itm=nodes.begin(); itm!=nodes.end(); itm++) 
		delete (*itm).second; 
	nodes.clear();   
}
double GradientHCRF::computeGradient(dVector& vecGradient, Model* m, DataSequence* X)
{
    int nbFeatures = pFeatureGen->getNumberOfFeatures();
    int NumSeqLabels=m->getNumberOfSequenceLabels();
    //Get adjency matrix
    uMatrix adjMat;
    m->getAdjacencyMatrix(adjMat, X);
    if(vecGradient.getLength() != nbFeatures)
        vecGradient.create(nbFeatures);
    dVector Partition;
    Partition.resize(1,NumSeqLabels);
    std::vector<Beliefs> ConditionalBeliefs(NumSeqLabels);

    // Step 1 : Run Inference in each network to compute marginals conditioned on Y
    for(int i=0; i<NumSeqLabels; i++)
    {
        pInfEngine->computeBeliefs(ConditionalBeliefs[i],pFeatureGen, X, m, true,i);
        Partition[i] = ConditionalBeliefs[i].partition;
    }
    double f_value = Partition.logSumExp() - Partition[X->getSequenceLabel()];
    // Step 2: Compute expected values for feature nodes conditioned on Y
#if !defined(_VEC_FEATURES) && !defined(_OPENMP)
    featureVector* vecFeatures;
#endif
#if defined(_OPENMP)
    int ThreadID = omp_get_thread_num();
    if (ThreadID >= nbThreadsMP)
        ThreadID = 0;
#else
    int ThreadID = 0;
#endif
    double value;
    dMatrix CEValues;
    CEValues.resize(nbFeatures,NumSeqLabels);
    //Loop over nodes to compute features and update the gradient
    for(int j=0; j<NumSeqLabels; j++) { //For every labels
        for(int i = 0; i < X->length(); i++) {//For every nodes
#if defined(_VEC_FEATURES) || defined(_OPENMP)
            pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,i,-1,j);
            // Loop over features
            feature* pFeature = vecFeaturesMP[ThreadID].getPtr();
            for(int k = 0; k < vecFeaturesMP[ThreadID].size(); k++, pFeature++)
#else
            vecFeatures =pFeatureGen->getFeatures(X,m,i,-1,j);
            // Loop over features
            feature* pFeature = vecFeatures->getPtr();
            for(int k = 0; k < vecFeatures->size(); k++, pFeature++)
#endif
            {
                //p(s_i=s|x,Y) * f_k(i,s,x,y)
                value=ConditionalBeliefs[j].belStates[i][pFeature->nodeState] * pFeature->value;
                CEValues.setValue(j,pFeature->globalId, CEValues(j,pFeature->globalId) + value); // one row for each Y
            }// end for every feature
        }// end for every node
    }// end for ever Sequence Label
    // Step 3: Compute expected values for edge features conditioned on Y
    //Loop over edges to compute features and update the gradient
    for(int j=0; j<NumSeqLabels; j++) {
        int edgeIndex = 0;
        for(int row = 0; row < X->length(); row++) {
            // Loop over all rows (the previous node index)
            for(int col = row; col < X->length() ; col++) {
                //Loop over all columns (the current node index)
                if(adjMat(row,col) == 1) {
                    //Get nodes features
#if defined(_VEC_FEATURES) || defined(_OPENMP)
                    pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,col,row,j);
                    // Loop over features
                    feature* pFeature = vecFeaturesMP[ThreadID].getPtr();
                    for(int k = 0; k < vecFeaturesMP[ThreadID].size(); k++, pFeature++)
#else
                    vecFeatures = pFeatureGen->getFeatures(X,m,col,row,j);
                    // Loop over features
                    feature* pFeature = vecFeatures->getPtr();
                    for(int k = 0; k < vecFeatures->size(); k++, pFeature++)
#endif
                    {
                        //p(y_i=s1,y_j=s2|x,Y)*f_k(i,j,s1,s2,x,y)
                        value=ConditionalBeliefs[j].belEdges[edgeIndex](pFeature->prevNodeState,pFeature->nodeState) * pFeature->value;
                        CEValues.setValue(j,pFeature->globalId, CEValues(j,pFeature->globalId) + value);
                    }
                    edgeIndex++;
                }
            }
        }
    }
    // Step 4: Compute Joint Expected Values
    dVector JointEValues;
    JointEValues.resize(1,nbFeatures);
    JointEValues.set(0);
    dVector rowJ;
    rowJ.resize(1,nbFeatures);
    dVector GradientVector;
    double sumZLog=Partition.logSumExp();
    for (int j=0; j<NumSeqLabels; j++)
    {
        CEValues.getRow(j, rowJ);
        rowJ.multiply(exp(Partition.getValue(j)-sumZLog));
        JointEValues.add(rowJ);
    }
    // Step 5 Compute Gradient as Exi[i,*,*] -Exi[*,*,*], that is difference
    // between expected values conditioned on Sequence Labels and Joint expected
    // values
    CEValues.getRow(X->getSequenceLabel(), rowJ); // rowJ=Expected value
    // conditioned on Sequence
    // label Y
    // [Negation moved to Gradient::ComputeGradient by LP]
//	 rowJ.negate();
    JointEValues.negate();
    rowJ.add(JointEValues);
    vecGradient.add(rowJ);
    return f_value;
}
void InferenceEngineLoopyBP::computeBeliefs(Beliefs &beliefs, FeatureGenerator *fGen,
                                            DataSequence *X, Model *m, int bComputePartition, int seqLabel, bool bUseStatePerNodes)
{
  // Get adjacency matrix; values indicate edge index (use upper triangle only)
  iMatrix adjMat;
  if( m->isMultiViewMode() )
    m->getAdjacencyMatrixMV(adjMat, X);
  else {
    // Quick and dirty, but I don't want to change Model::makeChain()
    int edgeID = 1;
    adjMat.create(X->length(),X->length());
    for(int r=1; r<adjMat.getHeight(); r++) {
      adjMat(r,r-1) = edgeID;
      adjMat(r-1,r) = edgeID;
      edgeID++;
    }
  }
  int adjMatMax = adjMat.getMaxValue();
  
  // 1. Initialize beliefs
  initializeBeliefs(beliefs, X, m, adjMat);
  
  // 2. Initialize messages
  std::vector<dVector> messages, prev_messages;
  initializeMessages(messages, X, m, adjMat, adjMatMax, false);
  for(unsigned int i=0; i<messages.size(); i++) {
    dVector v; prev_messages.push_back(v);
  }
  
  // 3. Initialize potentials
  Beliefs potentials;
  initializePotentials(potentials, fGen, X, m, adjMat, seqLabel, bUseStatePerNodes);
  
  // 4. Update loopy belief network
  int T = X->length();
  int V = m->getNumberOfViews();
  int nbNodes = V*T;
  
  int *messageUpdateOrder = new int[nbNodes];
  
  for(int iter=0; iter<m_max_iter; iter++)
  {
    // Get message update order
    getRandomUpdateOrder(messageUpdateOrder, X, m);
    
    // Update messages
    for( int i=0; i<nbNodes; i++ ) {
      int xi = messageUpdateOrder[i];
      for( int xj=0; xj<nbNodes; xj++ ) {
        if( !adjMat(xi,xj) ) continue;
        sendMessage(xi,xj,nbNodes,potentials,messages,adjMat,adjMatMax,m->isMaxMargin());
      }
    }
    
    // Convergence check
    if( iter>0 ) {
      double error = 0;
      for(unsigned int i=0; i<messages.size(); i++)
        for(int j=0; j<messages[i].getLength(); j++)
          error += fabs(messages[i][j] - prev_messages[i][j]);
      if( error < m_min_threshold ) break;
    }
    
    // Copy messages
    for(unsigned int i=0; i<messages.size(); i++)
      prev_messages[i] = messages[i];
  }
  
  // Compute beliefs & compute partition
  updateBeliefs(nbNodes, beliefs, potentials, messages, adjMat, adjMatMax);
  
  // 5. Clean up and Return
  if( messageUpdateOrder ) {
    delete[] messageUpdateOrder; messageUpdateOrder = 0;
  }
}
示例#4
0
double GradientCRF::computeGradient(dVector& vecGradient, Model* m, 
									DataSequence* X)
{
	//compute beliefs
	Beliefs bel;
	pInfEngine->computeBeliefs(bel,pFeatureGen, X, m, false);
	double phi = pFeatureGen->evaluateLabels(X,m);
	double partition = bel.partition;
	//Get adjency matrix
	uMatrix adjMat;
	m->getAdjacencyMatrix(adjMat, X);
	//Check the size of vecGradient
	int nbFeatures = pFeatureGen->getNumberOfFeatures();
	if(vecGradient.getLength() != nbFeatures)
		vecGradient.create(nbFeatures);
#if !defined(_VEC_FEATURES) && !defined(_OPENMP)
	featureVector* vecFeatures;
#endif
#if defined(_OPENMP)
	int ThreadID = omp_get_thread_num();
	if (ThreadID >= nbThreadsMP)
		ThreadID = 0;
#else
	int ThreadID = 0;
#endif

	//Loop over nodes to compute features and update the gradient
	for(int i = 0; i < X->length(); i++)
	{
		// Read the label for this state
		int s = X->getStateLabels(i);
		//Get nodes features
#if defined(_VEC_FEATURES) || defined(_OPENMP)
		pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,i,-1);
		// Loop over features
		feature* pFeature = vecFeaturesMP[ThreadID].getPtr();
		for(int j = 0; j < vecFeaturesMP[ThreadID].size(); j++, pFeature++)
#else
		vecFeatures = pFeatureGen->getFeatures(X,m,i,-1);
		// Loop over features
		feature* pFeature = vecFeatures->getPtr();
		for(int j = 0; j < vecFeatures->size(); j++, pFeature++)
#endif
		{

			// If feature has same state label as the label from the
			// dataSequence, then add this to the gradient
			if(pFeature->nodeState == s)
				vecGradient[pFeature->id] += pFeature->value;
			//p(y_i=s|x)*f_k(i,s,x) is subtracted from the gradient 
			vecGradient[pFeature->id] -= bel.belStates[i][pFeature->nodeState]*pFeature->value;
		}
	}
	//Loop over edges to compute features and update the gradient
	int edgeIndex = 0;
	for(int row = 0; row < X->length(); row++) // Loop over all rows (the previous node index)
	{
		for(int col = row; col < X->length() ; col++) //Loop over all columns (the current node index)
		{
			if(adjMat(row,col) == 1)
			{
				int s1 = X->getStateLabels(row);
				int s2 = X->getStateLabels(col);

				//Get nodes features
#if defined(_VEC_FEATURES) || defined(_OPENMP)
				pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,col,row);
				// Loop over features
				feature* pFeature = vecFeaturesMP[ThreadID].getPtr();
				for(int j = 0; j < vecFeaturesMP[ThreadID].size(); j++, pFeature++)
#else
				vecFeatures = pFeatureGen->getFeatures(X,m,col,row);
				// Loop over features
				feature* pFeature = vecFeatures->getPtr();
				for(int j = 0; j < vecFeatures->size(); j++, pFeature++)
#endif
				{
					// ++ Forward edge ++
					// If edge feature has same state labels as the labels from the dataSequence, then add it to the gradient
					if(pFeature->nodeState == s2 && pFeature->prevNodeState == s1)
						vecGradient[pFeature->id] += pFeature->value;

					//p(y_i=s1,y_j=s2|x)*f_k(i,j,s1,s2,x) is subtracted from the gradient 
					vecGradient[pFeature->id] -= bel.belEdges[edgeIndex](pFeature->prevNodeState,pFeature->nodeState)*pFeature->value;
				}
				edgeIndex++;
			}
		}
	}
	//Return -log instead of log() [Moved to Gradient::ComputeGradient by LP]
//	vecGradient.negate();
	return partition-phi;
}
示例#5
0
double GradientMVHCRF::computeGradientMLE(dVector& vecGradient, Model* m, DataSequence* X)
{    
	double f_value=0; // return value

	////////////////////////////////////////////////////////////////////////////////////
	// Step 1 : Run Inference in each network to compute marginals conditioned on Y
 	int nbSeqLabels = m->getNumberOfSequenceLabels();
	std::vector<Beliefs> condBeliefs(nbSeqLabels);	
	dVector Partition(nbSeqLabels);
	 
	for(int y=0; y<nbSeqLabels; y++) 
	{ 
		pInfEngine->computeBeliefs(condBeliefs[y], pFeatureGen, X, m, true, y);
		Partition[y] = condBeliefs[y].partition;; 
	} 
	
	////////////////////////////////////////////////////////////////////////////////////
	// Step 2 : Compute expected values for node/edge features conditioned on Y
	int nbFeatures = pFeatureGen->getNumberOfFeatures();
	dMatrix condEValues(nbFeatures, nbSeqLabels);
	
	feature* f;
	featureVector vecFeatures;

	iMatrix adjMat;
	m->getAdjacencyMatrixMV(adjMat, X);
	
	int V = m->getNumberOfViews();
	int T = X->length(); 
	int nbNodes= V*T;

	double val;
	int y, k, xi, xj;
	
	for(y=0; y<nbSeqLabels; y++) 
	{ 
		// Loop over nodes to compute features and update the gradient
		for(xi=0; xi<nbNodes; xi++) {
			pFeatureGen->getFeatures(vecFeatures,X,m,xi,-1,y);			
			f = vecFeatures.getPtr();						
			for(k=0; k<vecFeatures.size(); k++, f++) {  				
				// p(h^v_t=a|x,y) * f_k(v,t,a,x,y)
				val = condBeliefs[y].belStates[xi][f->nodeState] * f->value;
				condEValues.addValue(y, f->globalId, val);
			} 
		} 

		// Loop over edges to compute features and update the gradient
		for(xi=0; xi<nbNodes; xi++) {
			for(xj=xi+1; xj<nbNodes; xj++) {
				if( !adjMat(xi,xj) ) continue;
				pFeatureGen->getFeatures(vecFeatures,X,m,xj,xi,y);
				f = vecFeatures.getPtr();				
				for(k=0; k<vecFeatures.size(); k++, f++) {
					// p(h^vi_ti=a,h^vj_tj=b|x,y) * f_k(vi,ti,vj,tj,x,y)
					val = condBeliefs[y].belEdges[adjMat(xi,xj)-1]
							(f->prevNodeState,f->nodeState) * f->value;
					condEValues.addValue(y, f->globalId, val);
				} 
			} 
		} 	
	} 

	////////////////////////////////////////////////////////////////////////////////////
	// Step 3: Compute Joint Expected Values
	dVector JointEValues(nbFeatures);
	dVector rowJ(nbFeatures);  // expected value conditioned on seqLabel Y
	double sumZLog = Partition.logSumExp();
	for (int y=0; y<nbSeqLabels; y++) 
	{
		condEValues.getRow(y, rowJ);
		rowJ.multiply( exp(Partition[y]-sumZLog) );
		JointEValues.add(rowJ);
	}
	
	////////////////////////////////////////////////////////////////////////////////////
	// Step 4 Compute Gradient as Exi[i,*,*] - Exi[*,*,*], that is the difference between 
	// expected values conditioned on seqLabel Y and joint expected values	
	if( vecGradient.getLength() != nbFeatures )
		vecGradient.create(nbFeatures);

	condEValues.getRow(X->getSequenceLabel(), rowJ); 
	JointEValues.negate();
	rowJ.add(JointEValues);
	vecGradient.add(rowJ);  

	// MLE: return log(sum_y' p(y'|xi)) - log(p(yi|xi)})	
	f_value = Partition.logSumExp() - Partition[X->getSequenceLabel()]; 
	return f_value;
}