/* Multivariate normal probability density function, on the log scale, up to a constant of proportionality */ double dmvnormln(const VectorXd &x, const VectorXd &mu, const MatrixXd &sigma) { double pln = -Inf; if ( isposdef(sigma) ) { LLT<MatrixXd> Chol(sigma); MatrixXd L = Chol.matrixL(); pln = - L.diagonal().array().log().sum(); pln -= 0.5 * (x-mu).transpose() * Chol.solve(x-mu); } return (pln); }
TMatrixD Chol (TVectorD& covV, TVectorD& newSig) { int nCov = covV.GetNrows(); int n = newSig.GetNrows(); std::cout << nCov << " " << n << std::endl; if ( nCov != n*(n+1)/2. ) { std::cout << "vecTest: mismatch in inputs" << std::endl; return TMatrixD(); } // // create modified vector (replacing std.dev.s) // TVectorD newCovV(covV); int ind(0); for ( int i=0; i<n; ++i ) { for ( int j=0; j<=i; ++j ) { if ( j==i ) newCovV[ind] = newSig(i); ++ind; } } return Chol(newCovV,newSig); }
// E-step. Calculate Log Probs for each point to belong to each living class // will delete a class if covariance matrix is singular // also counts number of living classes void KK::EStep() { int p, c, cc, i; int nSkipped; float LogRootDet; // log of square root of covariance determinant float Mahal; // Mahalanobis distance of point from cluster center Array<float> Chol(nDims2); // to store choleski decomposition Array<float> Vec2Mean(nDims); // stores data point minus class mean Array<float> Root(nDims); // stores result of Chol*Root = Vec float *OptPtrLogP; int *OptPtrClass = Class.m_Data; int *OptPtrOldClass = OldClass.m_Data; nSkipped = 0; // start with cluster 0 - uniform distribution over space // because we have normalized all dims to 0...1, density will be 1. for (p=0; p<nPoints; p++) LogP[p*MaxPossibleClusters + 0] = (float)-log(Weight[0]); for (cc=1; cc<nClustersAlive; cc++) { c = AliveIndex[cc]; // calculate cholesky decomposition for class c if (Cholesky(Cov.m_Data+c*nDims2, Chol.m_Data, nDims)) { // If Cholesky returns 1, it means the matrix is not positive definite. // So kill the class. Output("Deleting class %d: covariance matrix is singular\n", c); ClassAlive[c] = 0; continue; } // LogRootDet is given by log of product of diagonal elements LogRootDet = 0; for(i=0; i<nDims; i++) LogRootDet += (float)log(Chol[i*nDims + i]); for (p=0; p<nPoints; p++) { // optimize for speed ... OptPtrLogP = LogP.m_Data + (p*MaxPossibleClusters); // to save time -- only recalculate if the last one was close if ( !FullStep // Class[p] == OldClass[p] // && LogP[p*MaxPossibleClusters+c] - LogP[p*MaxPossibleClusters+Class[p]] > DistThresh && OptPtrClass[p] == OptPtrOldClass[p] && OptPtrLogP[c] - OptPtrLogP[OptPtrClass[p]] > DistThresh ) { nSkipped++; continue; } // Compute Mahalanobis distance Mahal = 0; // calculate data minus class mean for(i=0; i<nDims; i++) Vec2Mean[i] = Data[p*nDims + i] - Mean[c*nDims + i]; // calculate Root vector - by Chol*Root = Vec2Mean TriSolve(Chol.m_Data, Vec2Mean.m_Data, Root.m_Data, nDims); // add half of Root vector squared to log p for(i=0; i<nDims; i++) Mahal += Root[i]*Root[i]; // Score is given by Mahal/2 + log RootDet - log weight // LogP[p*MaxPossibleClusters + c] = Mahal/2 OptPtrLogP[c] = Mahal/2 + LogRootDet - log(Weight[c]) + (float)log(2*M_PI)*nDims/2; /* if (Debug) { if (p==0) { Output("Cholesky\n"); MatPrint(stdout, Chol.m_Data, nDims, nDims); Output("root vector:\n"); MatPrint(stdout, Root.m_Data, 1, nDims); Output("First point's score = %.3g + %.3g - %.3g = %.3g\n", Mahal/2, LogRootDet , log(Weight[c]), LogP[p*MaxPossibleClusters + c]); } } */ } } // Output("Skipped %d ", nSkipped); }