void CMlLearningEngine::Learn() { /* function takes an information from m_pEvidences and learns factors of graphical model using prior probabilities or not */ float logLikTmp = 0; if(!m_pGrModel) { PNL_THROW( CNULLPointer, "no graphical model") } CStaticGraphicalModel *grmodel = this->GetStaticModel(); CFactor *parameter = NULL; int numberOfDomains = grmodel -> GetNumberOfFactors(); for( int domainNodes = 0; domainNodes < numberOfDomains; domainNodes++ ) { factor = grmodel->GetFactor( domainNodes ); factor ->UpdateStatisticsML( &m_Vector_pEvidences.front(), m_Vector_pEvidences.size() ); PNL_CHECK_LEFT_BORDER(m_numberOfAllEvidences, 1); logLikTmp += parameter->ProcessingStatisticalData(m_numberOfAllEvidences); } switch( grmodel -> GetModelType() ) { case mtBNet: { break; } case mtMRF2: case mtMNet: { logLikTmp = _LearnPotentials(); break; } default: { PNL_THROW(CBadConst, "model type" ) break; } } m_critValue.push_back(logLikTmp); }
void CParEMLearningEngine::Learn() { CStaticGraphicalModel *pGrModel = this->GetStaticModel(); PNL_CHECK_IS_NULL_POINTER(pGrModel); PNL_CHECK_LEFT_BORDER(GetNumEv() - GetNumberProcEv() , 1); CJtreeInfEngine *pCurrentInfEng = NULL; CFactor *parameter = NULL; int exit = 0; int numberOfParameters = pGrModel->GetNumberOfParameters(); int domainNodes; int infIsNeed = 0; int itsML = 0; // !!! float loglik = -FLT_MAX; float loglikOld = -FLT_MAX; float epsilon = GetPrecisionEM(); float stopExpression = epsilon + 1.0f; int iteration = 0; int currentEvidNumber; int bMaximize = 0; int bSumOnMixtureNode = 0; const CEvidence* pCurrentEvid; int start_mpi, finish_mpi; int NumberOfProcesses, MyRank; int numSelfEvidences; MPI_Comm_size(MPI_COMM_WORLD, &NumberOfProcesses); MPI_Comm_rank(MPI_COMM_WORLD, &MyRank); int d = 0; do { iteration++; numSelfEvidences = (GetNumEv() - GetNumberProcEv()) / NumberOfProcesses; start_mpi = GetNumberProcEv() + numSelfEvidences * MyRank; // !!! if (MyRank < NumberOfProcesses - 1) finish_mpi = start_mpi + numSelfEvidences; // !!! else finish_mpi = GetNumEv(); // !!! for(int ev = start_mpi; ev < finish_mpi; ev++) { infIsNeed = 0; currentEvidNumber = ev; // !!! pCurrentEvid = m_Vector_pEvidences[currentEvidNumber]; if( !pCurrentEvid) { PNL_THROW(CNULLPointer, "evidence") } infIsNeed = !GetObsFlags(ev)->empty(); // !!! if(infIsNeed) { // create inference engine if(!pCurrentInfEng) { pCurrentInfEng = CJtreeInfEngine::Create(pGrModel); } pCurrentInfEng->EnterEvidence(pCurrentEvid, bMaximize, bSumOnMixtureNode); } for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++) { parameter = pGrModel->GetFactor(domainNodes); if(infIsNeed) { int DomainSize; const int *domain; parameter->GetDomain(&DomainSize, &domain); if (IsDomainObserved(DomainSize, domain, currentEvidNumber)) { const CEvidence *pEvidences[] = { pCurrentEvid }; parameter->UpdateStatisticsML(pEvidences, 1); } else { pCurrentInfEng->MarginalNodes(domain, DomainSize, 1); const CPotential * pMargPot = pCurrentInfEng->GetQueryJPD(); parameter ->UpdateStatisticsEM(pMargPot, pCurrentEvid); } } else { const CEvidence *pEvidences[] = { pCurrentEvid }; parameter->UpdateStatisticsML(pEvidences, 1); } } itsML = itsML || !infIsNeed; } for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++ ) { parameter = pGrModel->GetFactor(domainNodes); CNumericDenseMatrix<float> *matForSending; int matDim; const int *pMatRanges; int dataLength; const float *pDataForSending; matForSending = static_cast<CNumericDenseMatrix<float>*> ((parameter->GetDistribFun())->GetStatisticalMatrix(stMatTable)); matForSending->GetRanges(&matDim, &pMatRanges); matForSending->GetRawData(&dataLength, &pDataForSending); float *pDataRecv = new float[dataLength]; float *pDataRecv_copy = new float[dataLength]; MPI_Status status; MPI_Allreduce((void*)pDataForSending, pDataRecv, dataLength, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); CNumericDenseMatrix<float> *RecvMatrix = static_cast<CNumericDenseMatrix<float>*> (parameter->GetDistribFun()->GetStatisticalMatrix(stMatTable)); int dataLength_new; float *pData_new; RecvMatrix->GetRawData(&dataLength_new, (const float**)(&pData_new)); for(int t=0;t<dataLength_new;t++) pData_new[t]=pDataRecv[t]; } switch (pGrModel->GetModelType()) { case mtBNet: { loglikOld = loglik; loglik = 0.0f; for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++) { parameter = pGrModel->GetFactor(domainNodes); loglik += parameter->ProcessingStatisticalData(m_numberOfAllEvidences); } break; } case mtMRF2: case mtMNet: { loglikOld = loglik; loglik = _LearnPotentials(); break; } default: { PNL_THROW(CBadConst, "model type") break; } } stopExpression = float(fabs(2 * (loglikOld - loglik) / (loglikOld + loglik))); exit = ((stopExpression > epsilon) && (iteration <= GetMaxIterEM())) && !itsML; if(exit) { ClearStatisticData(); } delete pCurrentInfEng; pCurrentInfEng = NULL; }while(exit); if(iteration > GetMaxIterEM()) { PNL_THROW(CNotConverged, "maximum number of iterations") } SetNumProcEv( GetNumEv() ); }
void CEMLearningEngine::LearnExtraCPDs(int nMaxFamily, pCPDVector* additionalCPDs, floatVector* additionalLLs) { CStaticGraphicalModel *pGrModel = this->GetStaticModel(); PNL_CHECK_IS_NULL_POINTER(pGrModel); PNL_CHECK_LEFT_BORDER(GetNumEv(), 1); int numberOfFactors = pGrModel->GetNumberOfFactors(); int numberOfAddFactors = additionalCPDs->size(); additionalLLs->resize(numberOfAddFactors); additionalLLs->clear(); m_vFamilyLogLik.resize(numberOfFactors); float loglik = 0.0f, ll; int i, ev; int iteration = 0; const CEvidence* pEv; CFactor *factor = NULL; int nnodes; const int * domain; bool bInfIsNeed; CInfEngine *pInfEng = m_pInfEngine; if (IsAllObserved()) { for (i = 0; i < numberOfFactors; i++) { factor = pGrModel->GetFactor(i); factor->UpdateStatisticsML(&m_Vector_pEvidences[GetNumberProcEv()], GetNumEv() - GetNumberProcEv()); } for( ev = 0; ev < GetNumEv() ; ev++) { pEv = m_Vector_pEvidences[ev]; for( i = 0; i < numberOfAddFactors; i++ ) { factor = static_cast<CFactor*>((*additionalCPDs)[i]); factor->UpdateStatisticsML( &pEv, 1 ); } } switch (pGrModel->GetModelType()) { case mtBNet: { for( i = 0; i<numberOfFactors; i++ ) { factor = pGrModel->GetFactor(i); ll = factor->ProcessingStatisticalData( GetNumEv()); m_vFamilyLogLik[i] = ll; loglik += ll; } for( i = 0; i < numberOfAddFactors; i++ ) { factor = static_cast<CFactor*>((*additionalCPDs)[i]); ll = factor->ProcessingStatisticalData( GetNumEv()); (*additionalLLs)[i] = ll; } break; } case mtMRF2: case mtMNet: { break; } default: { PNL_THROW(CBadConst, "model type" ) break; } } m_critValue.push_back(loglik); } else {
int testSetStatistics() { int ret = TRS_OK; float eps = 0.1f; int seed = pnlTestRandSeed(); pnlSeed( seed ); CBNet *pBNet = pnlExCreateCondGaussArBNet(); CModelDomain *pMD = pBNet->GetModelDomain(); CGraph *pGraph = CGraph::Copy(pBNet->GetGraph()); CBNet *pBNet1 = CBNet::CreateWithRandomMatrices( pGraph, pMD ); pEvidencesVector evidences; int nEvidences = pnlRand( 3000, 4000); pBNet->GenerateSamples( &evidences, nEvidences ); int i; for( i = 0; i < nEvidences; i++) { //evidences[i]->MakeNodeHiddenBySerialNum(0); } CEMLearningEngine *pLearn = CEMLearningEngine::Create(pBNet1); pLearn->SetData( nEvidences, &evidences.front() ); pLearn->SetMaxIterEM(); pLearn->Learn(); for( i = 0; i < pBNet->GetNumberOfFactors(); i++ ) { if( ! pBNet->GetFactor(i)->IsFactorsDistribFunEqual(pBNet1->GetFactor(i), eps)) { ret = TRS_FAIL; pBNet->GetFactor(i)->GetDistribFun()->Dump(); pBNet1->GetFactor(i)->GetDistribFun()->Dump(); } } CDistribFun *pDistr; const CMatrix<float>* pMat; CFactor *pCPD; pDistr = pBNet1->GetFactor(0)->GetDistribFun(); pMat = pDistr->GetStatisticalMatrix(stMatTable); pCPD = pBNet->GetFactor(0); pCPD->SetStatistics(pMat, stMatTable); pCPD->ProcessingStatisticalData(nEvidences); if( ! pCPD->IsFactorsDistribFunEqual(pBNet1->GetFactor(0), 0.0001f) ) { ret = TRS_FAIL; } pDistr = pBNet1->GetFactor(1)->GetDistribFun(); int parentVal; pCPD = pBNet->GetFactor(1); parentVal = 0; pCPD->SetStatistics(pMat, stMatCoeff); pMat = pDistr->GetStatisticalMatrix(stMatMu, &parentVal); pCPD->SetStatistics(pMat, stMatMu, &parentVal); pMat = pDistr->GetStatisticalMatrix(stMatSigma, &parentVal); pCPD->SetStatistics(pMat, stMatSigma, &parentVal); parentVal = 1; pMat = pDistr->GetStatisticalMatrix(stMatMu, &parentVal); pCPD->SetStatistics(pMat, stMatMu, &parentVal); pMat = pDistr->GetStatisticalMatrix(stMatSigma, &parentVal); pCPD->SetStatistics(pMat, stMatSigma, &parentVal); pCPD->ProcessingStatisticalData(nEvidences); if( ! pCPD->IsFactorsDistribFunEqual(pBNet1->GetFactor(1), eps) ) { ret = TRS_FAIL; } for( i = 0; i < nEvidences; i++) { delete evidences[i]; } delete pLearn; delete pBNet1; delete pBNet; return trsResult( ret, ret == TRS_OK ? "No errors" : "Bad test on SetStatistics"); }