void InferenceEngineBP::computeBeliefs(Beliefs &beliefs, FeatureGenerator *fGen, DataSequence *X, Model *m, int bComputePartition, int seqLabel, bool bUseStatePerNodes) { // Variable definition int xi, xj, nbNodes, seqLength; std::map<int,BPNode*> nodes; // tree graph std::map<int,BPNode*>::iterator itm; std::list<BPNode*>::iterator itl; BPNode* root; iMatrix adjMat; iVector nbStates; dVector* phi_i = 0; // singleton potentials dMatrix** phi_ij = 0; // pairwise potentials dVector** msg = 0; // messages if( m->isMultiViewMode() ) m->getAdjacencyMatrixMV(adjMat, X); else { uMatrix uAdjMat; m->getAdjacencyMatrix(uAdjMat, X); adjMat.resize(uAdjMat.getWidth(),uAdjMat.getHeight()); for(xi=0; xi<uAdjMat.getHeight(); xi++) for(xj=0; xj<uAdjMat.getWidth(); xj++) adjMat(xi,xj) = uAdjMat(xi,xj); } nbNodes = adjMat.getHeight(); seqLength = X->length(); // Create a vector that contains nbStates nbStates.create(nbNodes); for(xi=0; xi<nbNodes; xi++) nbStates[xi] = (m->isMultiViewMode()) ? m->getNumberOfStatesMV(xi/seqLength) : m->getNumberOfStates(); // Create BPGraph from adjMat for(xi=0; xi<nbNodes; xi++) { BPNode* v = new BPNode(xi, nbStates[xi]); nodes.insert( std::pair<int,BPNode*>(xi,v) ); } for(xi=0; xi<nbNodes; xi++) { for(xj=xi+1; xj<nbNodes; xj++) { if( !adjMat(xi,xj) ) continue; nodes[xi]->addNeighbor(nodes[xj]); nodes[xj]->addNeighbor(nodes[xi]); } } // Initialize initMessages(msg, X, m, adjMat, nbStates); initBeliefs(beliefs, X, m, adjMat, nbStates); initPotentials(phi_i, phi_ij, fGen, X, m, adjMat, nbStates, seqLabel); // Message update root = nodes[0]; // any node can be the root node { for(itl=root->neighbors.begin(); itl!=root->neighbors.end(); itl++) collect(root, *itl, phi_i, phi_ij, msg); for(itl=root->neighbors.begin(); itl!=root->neighbors.end(); itl++) distribute(root, *itl, phi_i, phi_ij, msg); } updateBeliefs(beliefs, phi_i, phi_ij, msg, X, m, adjMat); // Clean up for(xi=0; xi<nbNodes; xi++) { delete[] msg[xi]; msg[xi] = 0; delete[] phi_ij[xi]; phi_ij[xi] = 0; } delete[] msg; msg=0; delete[] phi_i; phi_i = 0; delete[] phi_ij; phi_ij = 0; for(itm=nodes.begin(); itm!=nodes.end(); itm++) delete (*itm).second; nodes.clear(); }
double GradientHCRF::computeGradient(dVector& vecGradient, Model* m, DataSequence* X) { int nbFeatures = pFeatureGen->getNumberOfFeatures(); int NumSeqLabels=m->getNumberOfSequenceLabels(); //Get adjency matrix uMatrix adjMat; m->getAdjacencyMatrix(adjMat, X); if(vecGradient.getLength() != nbFeatures) vecGradient.create(nbFeatures); dVector Partition; Partition.resize(1,NumSeqLabels); std::vector<Beliefs> ConditionalBeliefs(NumSeqLabels); // Step 1 : Run Inference in each network to compute marginals conditioned on Y for(int i=0; i<NumSeqLabels; i++) { pInfEngine->computeBeliefs(ConditionalBeliefs[i],pFeatureGen, X, m, true,i); Partition[i] = ConditionalBeliefs[i].partition; } double f_value = Partition.logSumExp() - Partition[X->getSequenceLabel()]; // Step 2: Compute expected values for feature nodes conditioned on Y #if !defined(_VEC_FEATURES) && !defined(_OPENMP) featureVector* vecFeatures; #endif #if defined(_OPENMP) int ThreadID = omp_get_thread_num(); if (ThreadID >= nbThreadsMP) ThreadID = 0; #else int ThreadID = 0; #endif double value; dMatrix CEValues; CEValues.resize(nbFeatures,NumSeqLabels); //Loop over nodes to compute features and update the gradient for(int j=0; j<NumSeqLabels; j++) { //For every labels for(int i = 0; i < X->length(); i++) {//For every nodes #if defined(_VEC_FEATURES) || defined(_OPENMP) pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,i,-1,j); // Loop over features feature* pFeature = vecFeaturesMP[ThreadID].getPtr(); for(int k = 0; k < vecFeaturesMP[ThreadID].size(); k++, pFeature++) #else vecFeatures =pFeatureGen->getFeatures(X,m,i,-1,j); // Loop over features feature* pFeature = vecFeatures->getPtr(); for(int k = 0; k < vecFeatures->size(); k++, pFeature++) #endif { //p(s_i=s|x,Y) * f_k(i,s,x,y) value=ConditionalBeliefs[j].belStates[i][pFeature->nodeState] * pFeature->value; CEValues.setValue(j,pFeature->globalId, CEValues(j,pFeature->globalId) + value); // one row for each Y }// end for every feature }// end for every node }// end for ever Sequence Label // Step 3: Compute expected values for edge features conditioned on Y //Loop over edges to compute features and update the gradient for(int j=0; j<NumSeqLabels; j++) { int edgeIndex = 0; for(int row = 0; row < X->length(); row++) { // Loop over all rows (the previous node index) for(int col = row; col < X->length() ; col++) { //Loop over all columns (the current node index) if(adjMat(row,col) == 1) { //Get nodes features #if defined(_VEC_FEATURES) || defined(_OPENMP) pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,col,row,j); // Loop over features feature* pFeature = vecFeaturesMP[ThreadID].getPtr(); for(int k = 0; k < vecFeaturesMP[ThreadID].size(); k++, pFeature++) #else vecFeatures = pFeatureGen->getFeatures(X,m,col,row,j); // Loop over features feature* pFeature = vecFeatures->getPtr(); for(int k = 0; k < vecFeatures->size(); k++, pFeature++) #endif { //p(y_i=s1,y_j=s2|x,Y)*f_k(i,j,s1,s2,x,y) value=ConditionalBeliefs[j].belEdges[edgeIndex](pFeature->prevNodeState,pFeature->nodeState) * pFeature->value; CEValues.setValue(j,pFeature->globalId, CEValues(j,pFeature->globalId) + value); } edgeIndex++; } } } } // Step 4: Compute Joint Expected Values dVector JointEValues; JointEValues.resize(1,nbFeatures); JointEValues.set(0); dVector rowJ; rowJ.resize(1,nbFeatures); dVector GradientVector; double sumZLog=Partition.logSumExp(); for (int j=0; j<NumSeqLabels; j++) { CEValues.getRow(j, rowJ); rowJ.multiply(exp(Partition.getValue(j)-sumZLog)); JointEValues.add(rowJ); } // Step 5 Compute Gradient as Exi[i,*,*] -Exi[*,*,*], that is difference // between expected values conditioned on Sequence Labels and Joint expected // values CEValues.getRow(X->getSequenceLabel(), rowJ); // rowJ=Expected value // conditioned on Sequence // label Y // [Negation moved to Gradient::ComputeGradient by LP] // rowJ.negate(); JointEValues.negate(); rowJ.add(JointEValues); vecGradient.add(rowJ); return f_value; }
void InferenceEngineLoopyBP::computeBeliefs(Beliefs &beliefs, FeatureGenerator *fGen, DataSequence *X, Model *m, int bComputePartition, int seqLabel, bool bUseStatePerNodes) { // Get adjacency matrix; values indicate edge index (use upper triangle only) iMatrix adjMat; if( m->isMultiViewMode() ) m->getAdjacencyMatrixMV(adjMat, X); else { // Quick and dirty, but I don't want to change Model::makeChain() int edgeID = 1; adjMat.create(X->length(),X->length()); for(int r=1; r<adjMat.getHeight(); r++) { adjMat(r,r-1) = edgeID; adjMat(r-1,r) = edgeID; edgeID++; } } int adjMatMax = adjMat.getMaxValue(); // 1. Initialize beliefs initializeBeliefs(beliefs, X, m, adjMat); // 2. Initialize messages std::vector<dVector> messages, prev_messages; initializeMessages(messages, X, m, adjMat, adjMatMax, false); for(unsigned int i=0; i<messages.size(); i++) { dVector v; prev_messages.push_back(v); } // 3. Initialize potentials Beliefs potentials; initializePotentials(potentials, fGen, X, m, adjMat, seqLabel, bUseStatePerNodes); // 4. Update loopy belief network int T = X->length(); int V = m->getNumberOfViews(); int nbNodes = V*T; int *messageUpdateOrder = new int[nbNodes]; for(int iter=0; iter<m_max_iter; iter++) { // Get message update order getRandomUpdateOrder(messageUpdateOrder, X, m); // Update messages for( int i=0; i<nbNodes; i++ ) { int xi = messageUpdateOrder[i]; for( int xj=0; xj<nbNodes; xj++ ) { if( !adjMat(xi,xj) ) continue; sendMessage(xi,xj,nbNodes,potentials,messages,adjMat,adjMatMax,m->isMaxMargin()); } } // Convergence check if( iter>0 ) { double error = 0; for(unsigned int i=0; i<messages.size(); i++) for(int j=0; j<messages[i].getLength(); j++) error += fabs(messages[i][j] - prev_messages[i][j]); if( error < m_min_threshold ) break; } // Copy messages for(unsigned int i=0; i<messages.size(); i++) prev_messages[i] = messages[i]; } // Compute beliefs & compute partition updateBeliefs(nbNodes, beliefs, potentials, messages, adjMat, adjMatMax); // 5. Clean up and Return if( messageUpdateOrder ) { delete[] messageUpdateOrder; messageUpdateOrder = 0; } }
double GradientCRF::computeGradient(dVector& vecGradient, Model* m, DataSequence* X) { //compute beliefs Beliefs bel; pInfEngine->computeBeliefs(bel,pFeatureGen, X, m, false); double phi = pFeatureGen->evaluateLabels(X,m); double partition = bel.partition; //Get adjency matrix uMatrix adjMat; m->getAdjacencyMatrix(adjMat, X); //Check the size of vecGradient int nbFeatures = pFeatureGen->getNumberOfFeatures(); if(vecGradient.getLength() != nbFeatures) vecGradient.create(nbFeatures); #if !defined(_VEC_FEATURES) && !defined(_OPENMP) featureVector* vecFeatures; #endif #if defined(_OPENMP) int ThreadID = omp_get_thread_num(); if (ThreadID >= nbThreadsMP) ThreadID = 0; #else int ThreadID = 0; #endif //Loop over nodes to compute features and update the gradient for(int i = 0; i < X->length(); i++) { // Read the label for this state int s = X->getStateLabels(i); //Get nodes features #if defined(_VEC_FEATURES) || defined(_OPENMP) pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,i,-1); // Loop over features feature* pFeature = vecFeaturesMP[ThreadID].getPtr(); for(int j = 0; j < vecFeaturesMP[ThreadID].size(); j++, pFeature++) #else vecFeatures = pFeatureGen->getFeatures(X,m,i,-1); // Loop over features feature* pFeature = vecFeatures->getPtr(); for(int j = 0; j < vecFeatures->size(); j++, pFeature++) #endif { // If feature has same state label as the label from the // dataSequence, then add this to the gradient if(pFeature->nodeState == s) vecGradient[pFeature->id] += pFeature->value; //p(y_i=s|x)*f_k(i,s,x) is subtracted from the gradient vecGradient[pFeature->id] -= bel.belStates[i][pFeature->nodeState]*pFeature->value; } } //Loop over edges to compute features and update the gradient int edgeIndex = 0; for(int row = 0; row < X->length(); row++) // Loop over all rows (the previous node index) { for(int col = row; col < X->length() ; col++) //Loop over all columns (the current node index) { if(adjMat(row,col) == 1) { int s1 = X->getStateLabels(row); int s2 = X->getStateLabels(col); //Get nodes features #if defined(_VEC_FEATURES) || defined(_OPENMP) pFeatureGen->getFeatures(vecFeaturesMP[ThreadID], X,m,col,row); // Loop over features feature* pFeature = vecFeaturesMP[ThreadID].getPtr(); for(int j = 0; j < vecFeaturesMP[ThreadID].size(); j++, pFeature++) #else vecFeatures = pFeatureGen->getFeatures(X,m,col,row); // Loop over features feature* pFeature = vecFeatures->getPtr(); for(int j = 0; j < vecFeatures->size(); j++, pFeature++) #endif { // ++ Forward edge ++ // If edge feature has same state labels as the labels from the dataSequence, then add it to the gradient if(pFeature->nodeState == s2 && pFeature->prevNodeState == s1) vecGradient[pFeature->id] += pFeature->value; //p(y_i=s1,y_j=s2|x)*f_k(i,j,s1,s2,x) is subtracted from the gradient vecGradient[pFeature->id] -= bel.belEdges[edgeIndex](pFeature->prevNodeState,pFeature->nodeState)*pFeature->value; } edgeIndex++; } } } //Return -log instead of log() [Moved to Gradient::ComputeGradient by LP] // vecGradient.negate(); return partition-phi; }
double GradientMVHCRF::computeGradientMLE(dVector& vecGradient, Model* m, DataSequence* X) { double f_value=0; // return value //////////////////////////////////////////////////////////////////////////////////// // Step 1 : Run Inference in each network to compute marginals conditioned on Y int nbSeqLabels = m->getNumberOfSequenceLabels(); std::vector<Beliefs> condBeliefs(nbSeqLabels); dVector Partition(nbSeqLabels); for(int y=0; y<nbSeqLabels; y++) { pInfEngine->computeBeliefs(condBeliefs[y], pFeatureGen, X, m, true, y); Partition[y] = condBeliefs[y].partition;; } //////////////////////////////////////////////////////////////////////////////////// // Step 2 : Compute expected values for node/edge features conditioned on Y int nbFeatures = pFeatureGen->getNumberOfFeatures(); dMatrix condEValues(nbFeatures, nbSeqLabels); feature* f; featureVector vecFeatures; iMatrix adjMat; m->getAdjacencyMatrixMV(adjMat, X); int V = m->getNumberOfViews(); int T = X->length(); int nbNodes= V*T; double val; int y, k, xi, xj; for(y=0; y<nbSeqLabels; y++) { // Loop over nodes to compute features and update the gradient for(xi=0; xi<nbNodes; xi++) { pFeatureGen->getFeatures(vecFeatures,X,m,xi,-1,y); f = vecFeatures.getPtr(); for(k=0; k<vecFeatures.size(); k++, f++) { // p(h^v_t=a|x,y) * f_k(v,t,a,x,y) val = condBeliefs[y].belStates[xi][f->nodeState] * f->value; condEValues.addValue(y, f->globalId, val); } } // Loop over edges to compute features and update the gradient for(xi=0; xi<nbNodes; xi++) { for(xj=xi+1; xj<nbNodes; xj++) { if( !adjMat(xi,xj) ) continue; pFeatureGen->getFeatures(vecFeatures,X,m,xj,xi,y); f = vecFeatures.getPtr(); for(k=0; k<vecFeatures.size(); k++, f++) { // p(h^vi_ti=a,h^vj_tj=b|x,y) * f_k(vi,ti,vj,tj,x,y) val = condBeliefs[y].belEdges[adjMat(xi,xj)-1] (f->prevNodeState,f->nodeState) * f->value; condEValues.addValue(y, f->globalId, val); } } } } //////////////////////////////////////////////////////////////////////////////////// // Step 3: Compute Joint Expected Values dVector JointEValues(nbFeatures); dVector rowJ(nbFeatures); // expected value conditioned on seqLabel Y double sumZLog = Partition.logSumExp(); for (int y=0; y<nbSeqLabels; y++) { condEValues.getRow(y, rowJ); rowJ.multiply( exp(Partition[y]-sumZLog) ); JointEValues.add(rowJ); } //////////////////////////////////////////////////////////////////////////////////// // Step 4 Compute Gradient as Exi[i,*,*] - Exi[*,*,*], that is the difference between // expected values conditioned on seqLabel Y and joint expected values if( vecGradient.getLength() != nbFeatures ) vecGradient.create(nbFeatures); condEValues.getRow(X->getSequenceLabel(), rowJ); JointEValues.negate(); rowJ.add(JointEValues); vecGradient.add(rowJ); // MLE: return log(sum_y' p(y'|xi)) - log(p(yi|xi)}) f_value = Partition.logSumExp() - Partition[X->getSequenceLabel()]; return f_value; }