double CyclicCoordinateDescent::getObjectiveFunction(int convergenceType) { if (convergenceType == GRADIENT) { double criterion = 0; if (useCrossValidation) { for (int i = 0; i < K; i++) { criterion += hXBeta[i] * hY[i] * hWeights[i]; } } else { for (int i = 0; i < K; i++) { criterion += hXBeta[i] * hY[i]; } } return static_cast<double> (criterion); } else if (convergenceType == MITTAL) { return getLogLikelihood(); } else if (convergenceType == LANGE) { return getLogLikelihood() + getLogPrior(); } else { std::ostringstream stream; stream << "Invalid convergence type: " << convergenceType; error->throwError(stream); } return 0.0; }
/*------------------------------------------- initRANDOM ---------- updated in this method : - _parameter - _tabFik and _tabSumF (because bestParameter is choose with the best LL which is computed with fik (and sumF) Note : _tabFik and sumF wil be 're'computed in the following EStep So only _parameter have to be updated in this method -------------------------------------------*/ void XEMModel::initRANDOM(int64_t nbTry) { // cout<<"init RANDOM, nbTryInInit="<<nbTry<<endl; _algoName = UNKNOWN_ALGO_NAME; int64_t i,k; double logLikelihood, bestLogLikelihood; XEMParameter * bestParameter = _parameter->clone(); bool * tabIndividualCanBeUsedForInitRandom = new bool[_nbSample]; for (i=0; i<_nbSample; i++) { tabIndividualCanBeUsedForInitRandom[i] = true; } bool * tabClusterToInitialize = new bool[_nbCluster]; for (k=0; k<_nbCluster; k++) { tabClusterToInitialize[k] = true; } // 1. InitForInitRandom //--------------------- _parameter->initForInitRANDOM(); // 1rst RANDOM //------------- randomForInitRANDOMorUSER_PARTITION(tabIndividualCanBeUsedForInitRandom, tabClusterToInitialize); // Compute log-likelihood logLikelihood = getLogLikelihood(true); // true : to compute fik bestLogLikelihood = logLikelihood; bestParameter->recopy(_parameter); /*cout<<"initRandom"<<endl<<"1er essai : "<<endl<<"Parameter : "<<endl; _parameter->edit(); cout<<"LL : "<< bestLogLikelihood<<endl;*/ // Others RANDOM for (i=1; i<nbTry; i++) { randomForInitRANDOMorUSER_PARTITION(tabIndividualCanBeUsedForInitRandom, tabClusterToInitialize); // Compute log-likelihood logLikelihood = getLogLikelihood(true); // true : to compute fik if (logLikelihood > bestLogLikelihood) { bestLogLikelihood = logLikelihood; bestParameter->recopy(_parameter); } /*cout<<endl<<"initRandom"<<endl<<i+1<<" eme essai : "<<endl<<"Parameter : "<<endl; _parameter->edit(); cout<<"LL : "<< logLikelihood<<endl;*/ } // set best parameter delete _parameter; _parameter = bestParameter; _parameter->setModel(this); /*cout<<endl<<"initRandom"<<endl<<"meilleur essai : "<<endl<<"Parameter : "<<endl; _parameter->edit(); cout<<"LL : "<< bestLogLikelihood<<endl;*/ //cout<<"fin de init RANDOM, nb d'essais effectues="<<i<<endl; delete [] tabIndividualCanBeUsedForInitRandom; delete [] tabClusterToInitialize; }
void XEMModel::oneRunOfSmallEM(XEMClusteringStrategyInit * clusteringStrategyInit, double & logLikelihood) { double lastLogLikelihood, eps; eps = 1000; initRANDOM(1); Estep(); Mstep(); logLikelihood = getLogLikelihood(true); // true : to compute fik int64_t nbIteration = 1; bool continueAgain = true; while (continueAgain) { // cout<<"while de oneRunOfSmallEM, nbIteration = "<<nbIteration<<endl; //(nbIteration < strategyInit->getNbIteration()) && (eps > strategyInit->getEpsilon())){ lastLogLikelihood = logLikelihood; Estep(); Mstep(); nbIteration++; // update continueAgain switch (clusteringStrategyInit->getStopName()) { case NBITERATION : continueAgain = (nbIteration < clusteringStrategyInit->getNbIteration()); break; case EPSILON : logLikelihood = getLogLikelihood(true); // true : to compute fik eps = fabs(logLikelihood - lastLogLikelihood); //continueAgain = (eps > strategyInit->getEpsilon()); continueAgain = (eps > clusteringStrategyInit->getEpsilon() && (nbIteration < maxNbIterationInInit)); // on ajoute un test pour ne pas faire trop d'iterations quand meme .... break; case NBITERATION_EPSILON : logLikelihood = getLogLikelihood(true); // true : to compute fi eps = fabs(logLikelihood - lastLogLikelihood); continueAgain = ((eps > clusteringStrategyInit->getEpsilon()) && (nbIteration < clusteringStrategyInit->getNbIteration())); break; default : throw internalMixmodError; } } if (clusteringStrategyInit->getStopName() == NBITERATION) { // logLikelihood is an output logLikelihood = getLogLikelihood(true); // true : to compute fi } //cout<<"Fin de oneRunOfSmallEM, nb d'iterations effectuees = "<<nbIteration<<", logLikelihood = "<<logLikelihood<<endl; }
double TDistribution::getLikelihood(const Mat& p) const { // // Frequently fails w/ overflow problems when v is large. // double n1 = boost::math::tgamma((_v + _D) / 2.0, 0.0); // double d1 = pow(_v * M_PI, _D / 2.0); // double d2 = pow(determinant(_sigma), 0.5) * boost::math::tgamma(_v / 2.0, 0.0); // Mat n2 = (p - _mu) * _sigma.inv() * (p - _mu).t(); // cout << n2 << endl; // double result = (n1 / (d1 * d2)) * pow(1 + n2.at<double>(0) / _v, -(_v + _D) / 2.0); return exp(getLogLikelihood(p)); }
//----------------------------------------- // get completed LL (if CEM) or LL (elseif) //----------------------------------------- double XEMModel::getCompletedLogLikelihoodOrLogLikelihood() { double res = 0.; if (_algoName == UNKNOWN_ALGO_NAME) { throw internalMixmodError; } else { if (_algoName == CEM) { res = getCompletedLogLikelihood(); } else { res = getLogLikelihood(true); } } return res; }
void XEMModel::initSEM_MAX(XEMStrategyInit * strategyInit) { //cout<<"init SEM_MAX, nbTryInInit="<<strategyInit->getNbIteration()<<endl; _algoName = SEM; int64_t j; double logLikelihood, bestLogLikelihood; XEMParameter * bestParameter = _parameter->clone(); int64_t nbRunOfSEMMAXOk = 0; bestLogLikelihood = 0.0; int64_t bestIndex=0; for (j=0; j<strategyInit->getNbIteration(); j++) { nbRunOfSEMMAXOk++; try { _parameter->reset(); initRANDOM(1); Estep(); Sstep(); Mstep(); // Compute log-likelihood logLikelihood = getLogLikelihood(true); // true : to compute fik if ((nbRunOfSEMMAXOk==1) || (logLikelihood > bestLogLikelihood)) { bestLogLikelihood = logLikelihood; bestParameter->recopy(_parameter); bestIndex = j; } } catch (XEMErrorType errorType) { nbRunOfSEMMAXOk--; } } if (nbRunOfSEMMAXOk==0) { throw SEM_MAX_error; } //cout<<"fin de init SEM_MAX, nb d'iterations effectuees="<<j<<" meilleure solution : "<<bestIndex<<endl; // set best parameter delete _parameter; _parameter = bestParameter; _parameter->setModel(this); }
/* Les partitions donnees servent a calculer - les cik, les nk (dans fixKnownLabel) appel� dans le constructeur - les centres lorsque l'on a au moins un representant de la classe dans la initPartition (sinon on tire au hasard) En revanche, on ne les utilise pas pour les dispersions. On pourrait le faire si on a beaucoup d'information mais dans ce le cas ou l'on a peu d'information (ex : un seul individu pour une des classes), on ne peut pas calculer de disperion. On calcule donc la dispersion autour du centre (comme s'il y a vait une seule classe) dans le cas gaussien et on tire la dispersion au hasard dans le cas binaire. */ void XEMModel::initUSER_PARTITION(XEMPartition * initPartition, int64_t nbTryInInit) { _algoName = UNKNOWN_ALGO_NAME; int64_t nbInitializedCluster; bool * tabNotInitializedCluster = new bool[_nbCluster]; // 1. InitForUSER_PARTITION //------------------------- _parameter->initForInitUSER_PARTITION(nbInitializedCluster, tabNotInitializedCluster, initPartition); // 2.init random if needed //------------------------ if (nbInitializedCluster != _nbCluster) { // upadte tabIndividualCanBeUsedForInitRandom int64_t i, k; int64_t ** initLabelValue = initPartition->_tabValue; int64_t nbSampleCanBeUsedForInitRandom = _nbSample; bool * tabIndividualCanBeUsedForInitRandom = new bool[_nbSample]; for (i=0; i<_nbSample; i++) { tabIndividualCanBeUsedForInitRandom[i] = true; k=0; while (k<_nbCluster && tabIndividualCanBeUsedForInitRandom[i]) { if (initLabelValue[i][k]==1) { tabIndividualCanBeUsedForInitRandom[i] = false; nbSampleCanBeUsedForInitRandom--; } k++; } } if (nbSampleCanBeUsedForInitRandom < (_nbCluster - nbInitializedCluster)) { throw tooManySampleInInitPartitionAndTooManyClusterNotRepresented; } double logLikelihood, bestLogLikelihood; XEMParameter * bestParameter = _parameter->clone(); // 1rst random //------------- //cout<<"1rst random"<<endl; randomForInitRANDOMorUSER_PARTITION(tabIndividualCanBeUsedForInitRandom, tabNotInitializedCluster); // Compute log-likelihood logLikelihood = getLogLikelihood(true); // true : to compute fik bestLogLikelihood = logLikelihood; bestParameter->recopy(_parameter); /*cout<<"initRandom"<<endl<<"1er essai : "<<endl<<"Parameter : "<<endl; _parameter->edit(); cout<<"LL : "<< bestLogLikelihood<<endl;*/ // Others RANDOM //------------- for (i=1; i<nbTryInInit; i++) { // cout<<i+1<<" random"<<endl; randomForInitRANDOMorUSER_PARTITION(tabIndividualCanBeUsedForInitRandom, tabNotInitializedCluster); // Compute log-likelihood logLikelihood = getLogLikelihood(true); // true : to compute fik if (logLikelihood > bestLogLikelihood) { bestLogLikelihood = logLikelihood; bestParameter->recopy(_parameter); } /*cout<<endl<<"initRandom"<<endl<<i+1<<" eme essai : "<<endl<<"Parameter : "<<endl; _parameter->edit(); cout<<"LL : "<< logLikelihood<<endl;*/ } // set best parameter delete _parameter; _parameter = bestParameter; _parameter->setModel(this); /*cout<<endl<<"initRandom"<<endl<<"meilleur essai : "<<endl<<"Parameter : "<<endl; _parameter->edit(); cout<<"LL : "<< bestLogLikelihood<<endl;*/ delete [] tabIndividualCanBeUsedForInitRandom; } delete [] tabNotInitializedCluster; }
void CyclicCoordinateDescent::findMode( int maxIterations, int convergenceType, double epsilon ) { if (convergenceType < GRADIENT || convergenceType > ZHANG_OLES) { std::ostringstream stream; stream << "Unknown convergence criterion: " << convergenceType; error->throwError(stream); } if (!validWeights || hXI.getTouchedY() // || hXI.getTouchedX() ) { computeNEvents(); computeFixedTermsInLogLikelihood(); computeFixedTermsInGradientAndHessian(); validWeights = true; hXI.clean(); } if (!xBetaKnown) { computeXBeta(); xBetaKnown = true; sufficientStatisticsKnown = false; } if (!sufficientStatisticsKnown) { computeRemainingStatistics(true, 0); // TODO Check index? sufficientStatisticsKnown = true; } resetBounds(); bool done = false; int iteration = 0; double lastObjFunc = 0.0; if (convergenceType < ZHANG_OLES) { lastObjFunc = getObjectiveFunction(convergenceType); } else { // ZHANG_OLES saveXBeta(); } while (!done) { // Do a complete cycle for(int index = 0; index < J; index++) { if (!fixBeta[index]) { double delta = ccdUpdateBeta(index); delta = applyBounds(delta, index); if (delta != 0.0) { sufficientStatisticsKnown = false; updateSufficientStatistics(delta, index); } } if ( (noiseLevel > QUIET) && ((index+1) % 100 == 0)) { std::ostringstream stream; stream << "Finished variable " << (index+1); logger->writeLine(stream); } } iteration++; // bool checkConvergence = (iteration % J == 0 || iteration == maxIterations); bool checkConvergence = true; // Check after each complete cycle if (checkConvergence) { double conv; bool illconditioned = false; if (convergenceType < ZHANG_OLES) { double thisObjFunc = getObjectiveFunction(convergenceType); if (thisObjFunc != thisObjFunc) { std::ostringstream stream; stream << "\nWarning: problem is ill-conditioned for this choice of\n" << "\t prior (" << jointPrior->getDescription() << ") or\n" << "\t initial bounding box (" << initialBound << ")\n" << "Enforcing convergence!"; logger->writeLine(stream); conv = 0.0; illconditioned = true; } else { conv = computeConvergenceCriterion(thisObjFunc, lastObjFunc); } lastObjFunc = thisObjFunc; } else { // ZHANG_OLES conv = computeZhangOlesConvergenceCriterion(); saveXBeta(); } // Necessary to call getObjFxn or computeZO before getLogLikelihood, // since these copy over XBeta double thisLogLikelihood = getLogLikelihood(); double thisLogPrior = getLogPrior(); double thisLogPost = thisLogLikelihood + thisLogPrior; std::ostringstream stream; if (noiseLevel > QUIET) { stream << "\n"; printVector(&hBeta[0], J, stream); stream << "\n"; stream << "log post: " << thisLogPost << " (" << thisLogLikelihood << " + " << thisLogPrior << ") (iter:" << iteration << ") "; } if (epsilon > 0 && conv < epsilon) { if (illconditioned) { lastReturnFlag = ILLCONDITIONED; } else { if (noiseLevel > SILENT) { stream << "Reached convergence criterion"; } lastReturnFlag = SUCCESS; } done = true; } else if (iteration == maxIterations) { if (noiseLevel > SILENT) { stream << "Reached maximum iterations"; } done = true; lastReturnFlag = MAX_ITERATIONS; } if (noiseLevel > QUIET) { logger->writeLine(stream); } logger->yield(); } } lastIterationCount = iteration; updateCount += 1; modelSpecifics.printTiming(); fisherInformationKnown = false; varianceKnown = false; }
int FitNullModel(Matrix& mat_Xnull, Matrix& mat_y, const EigenMatrix& kinshipU, const EigenMatrix& kinshipS){ // type conversion Eigen::MatrixXf x; Eigen::MatrixXf y; G_to_Eigen(mat_Xnull, &x); G_to_Eigen(mat_y, &y); this->lambda = kinshipS.mat; const Eigen::MatrixXf& U = kinshipU.mat; // rotate this->ux = U.transpose() * x; this->uy = U.transpose() * y; // get beta, sigma2_g and delta // where delta = sigma2_e / sigma2_g double loglik[101]; int maxIndex = -1; double maxLogLik = 0; for (int i = 0; i <= 100; ++i ){ double d = exp(-10 + i * 0.2); getBetaSigma2(d); loglik[i] = getLogLikelihood(d); // fprintf(stderr, "%d\tdelta=%g\tll=%lf\n", i, delta, loglik[i]); if (std::isnan(loglik[i])) { continue; } if (maxIndex < 0 || loglik[i] > maxLogLik) { maxIndex = i; maxLogLik = loglik[i]; } } if (maxIndex < -1) { fprintf(stderr, "Cannot optimize\n"); return -1; } if (maxIndex == 0 || maxIndex == 100) { // on the boundary // do not try maximize it. } else { gsl_function F; F.function = goalFunction; F.params = this; Minimizer minimizer; double lb = exp(-10 + (maxIndex-1) * 0.2); double ub = exp(-10 + (maxIndex+1) * 0.2); double start = exp(-10 + maxIndex * 0.2); if (minimizer.minimize(F, start, lb, ub)) { // fprintf(stderr, "Minimization failed, fall back to initial guess.\n"); this->delta = start; } else { this->delta = minimizer.getX(); // fprintf(stderr, "minimization succeed when delta = %g, sigma2_g = %g\n", this->delta, this->sigma2_g); } } // store some intermediate results // fprintf(stderr, "maxIndex = %d, delta = %g, Try brent\n", maxIndex, delta); // fprintf(stderr, "beta[%d][%d] = %g\n", (int)beta.rows(), (int)beta.cols(), beta(0,0)); this->h2 = 1.0 /(1.0 + this->delta); this->sigma2 = this->sigma2_g * this->h2; // we derive different formular to replace original eqn (7) this->gamma = (this->lambda.array() / (this->lambda.array() + this->delta)).sum() / this->sigma2_g / (this->ux.rows() - 1 ) ; // fprintf(stderr, "gamma = %g\n", this->gamma); // transformedY = \Sigma^{-1} * (y_tilda) and y_tilda = y - X * \beta // since \Sigma = (\sigma^2_g * h^2 ) * (U * (\lambda + delta) * U') // transformedY = 1 / (\sigma^2_g * h^2 ) * (U * (\lambda+delta)^{-1} * U' * (y_tilda)) // = 1 / (\sigma^2_g * h^2 ) * (U * \lambda^{-1} * (uResid)) // since h^2 = 1 / (1+delta) // transformedY = (1 + delta/ (\sigma^2_g ) * (U * \lambda^{-1} * (uResid)) Eigen::MatrixXf resid = y - x * (x.transpose() * x).eval().ldlt().solve(x.transpose() * y); // this is y_tilda this->transformedY.noalias() = U.transpose() * resid; this->transformedY = (this->lambda.array() + this->delta).inverse().matrix().asDiagonal() * this->transformedY; this->transformedY = U * this->transformedY; this->transformedY /= this->sigma2_g; // fprintf(stderr, "transformedY(0,0) = %g\n", transformedY(0,0)); this->ySigmaY= (resid.array() * transformedY.array()).sum(); return 0; }
int FitNullModel(Matrix& mat_Xnull, Matrix& mat_y, const EigenMatrix& kinshipU, const EigenMatrix& kinshipS){ // sanity check if (mat_Xnull.rows != mat_y.rows) return -1; if (mat_Xnull.rows != kinshipU.mat.rows()) return -1; if (mat_Xnull.rows != kinshipS.mat.rows()) return -1; // type conversion G_to_Eigen(mat_Xnull, &this->ux); G_to_Eigen(mat_y, &this->uy); this->lambda = kinshipS.mat; const Eigen::MatrixXf& U = kinshipU.mat; // rotate this->ux = U.transpose() * this->ux; this->uy = U.transpose() * this->uy; // get beta, sigma and delta // where delta = sigma2_e / sigma2_g double loglik[101]; int maxIndex = -1; double maxLogLik = 0; for (int i = 0; i <= 100; ++i ){ delta = exp(-10 + i * 0.2); getBetaSigma2(delta); loglik[i] = getLogLikelihood(delta); #ifdef DEBUG fprintf(stderr, "%d\tdelta=%g\tll=%lf\n", i, delta, loglik[i]); fprintf(stderr, "beta(0)=%lf\tsigma2=%lf\n", beta(0), sigma2); #endif if (std::isnan(loglik[i])) { continue; } if (maxIndex < 0 || loglik[i] > maxLogLik) { maxIndex = i; maxLogLik = loglik[i]; } } if (maxIndex < -1) { fprintf(stderr, "Cannot optimize\n"); return -1; } #if 0 fprintf(stderr, "maxIndex = %d\tll=%lf\t\tbeta(0)=%lf\tsigma2=%lf\n", maxIndex, maxLogLik, beta(0), sigma2); #endif if (maxIndex == 0 || maxIndex == 100) { // on the boundary // do not try maximize it. } else { gsl_function F; F.function = goalFunction; F.params = this; Minimizer minimizer; double lb = exp(-10 + (maxIndex-1) * 0.2); double ub = exp(-10 + (maxIndex+1) * 0.2); double start = exp(-10 + maxIndex * 0.2); if (minimizer.minimize(F, start, lb, ub)) { fprintf(stderr, "Minimization failed, fall back to initial guess.\n"); this->delta = start; } else { this->delta = minimizer.getX(); #ifdef DEBUG fprintf(stderr, "minimization succeed when delta = %g, sigma2 = %g\n", this->delta, this->sigma2); #endif } } // store some intermediate results #ifdef DEBUG fprintf(stderr, "delta = sigma2_e/sigma2_g, and sigma2 is sigma2_g\n"); fprintf(stderr, "maxIndex = %d, delta = %g, Try brent\n", maxIndex, delta); fprintf(stderr, "beta[0][0] = %g\t sigma2_g = %g\tsigma2_e = %g\n", beta(0,0), this->sigma2, delta * sigma2); #endif // if (this->test == MetaCov::LRT) { // this->nullLikelihood = getLogLikelihood(this->delta); // } else if (this->test == MetaCov::SCORE) { // this->uResid = this->uy - this->ux * this->beta; // } return 0; }