void CMlLearningEngine::Learn() { /* function takes an information from m_pEvidences and learns factors of graphical model using prior probabilities or not */ float logLikTmp = 0; if(!m_pGrModel) { PNL_THROW( CNULLPointer, "no graphical model") } CStaticGraphicalModel *grmodel = this->GetStaticModel(); CFactor *parameter = NULL; int numberOfDomains = grmodel -> GetNumberOfFactors(); for( int domainNodes = 0; domainNodes < numberOfDomains; domainNodes++ ) { factor = grmodel->GetFactor( domainNodes ); factor ->UpdateStatisticsML( &m_Vector_pEvidences.front(), m_Vector_pEvidences.size() ); PNL_CHECK_LEFT_BORDER(m_numberOfAllEvidences, 1); logLikTmp += parameter->ProcessingStatisticalData(m_numberOfAllEvidences); } switch( grmodel -> GetModelType() ) { case mtBNet: { break; } case mtMRF2: case mtMNet: { logLikTmp = _LearnPotentials(); break; } default: { PNL_THROW(CBadConst, "model type" ) break; } } m_critValue.push_back(logLikTmp); }
void CParEMLearningEngine::Learn() { CStaticGraphicalModel *pGrModel = this->GetStaticModel(); PNL_CHECK_IS_NULL_POINTER(pGrModel); PNL_CHECK_LEFT_BORDER(GetNumEv() - GetNumberProcEv() , 1); CJtreeInfEngine *pCurrentInfEng = NULL; CFactor *parameter = NULL; int exit = 0; int numberOfParameters = pGrModel->GetNumberOfParameters(); int domainNodes; int infIsNeed = 0; int itsML = 0; // !!! float loglik = -FLT_MAX; float loglikOld = -FLT_MAX; float epsilon = GetPrecisionEM(); float stopExpression = epsilon + 1.0f; int iteration = 0; int currentEvidNumber; int bMaximize = 0; int bSumOnMixtureNode = 0; const CEvidence* pCurrentEvid; int start_mpi, finish_mpi; int NumberOfProcesses, MyRank; int numSelfEvidences; MPI_Comm_size(MPI_COMM_WORLD, &NumberOfProcesses); MPI_Comm_rank(MPI_COMM_WORLD, &MyRank); int d = 0; do { iteration++; numSelfEvidences = (GetNumEv() - GetNumberProcEv()) / NumberOfProcesses; start_mpi = GetNumberProcEv() + numSelfEvidences * MyRank; // !!! if (MyRank < NumberOfProcesses - 1) finish_mpi = start_mpi + numSelfEvidences; // !!! else finish_mpi = GetNumEv(); // !!! for(int ev = start_mpi; ev < finish_mpi; ev++) { infIsNeed = 0; currentEvidNumber = ev; // !!! pCurrentEvid = m_Vector_pEvidences[currentEvidNumber]; if( !pCurrentEvid) { PNL_THROW(CNULLPointer, "evidence") } infIsNeed = !GetObsFlags(ev)->empty(); // !!! if(infIsNeed) { // create inference engine if(!pCurrentInfEng) { pCurrentInfEng = CJtreeInfEngine::Create(pGrModel); } pCurrentInfEng->EnterEvidence(pCurrentEvid, bMaximize, bSumOnMixtureNode); } for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++) { parameter = pGrModel->GetFactor(domainNodes); if(infIsNeed) { int DomainSize; const int *domain; parameter->GetDomain(&DomainSize, &domain); if (IsDomainObserved(DomainSize, domain, currentEvidNumber)) { const CEvidence *pEvidences[] = { pCurrentEvid }; parameter->UpdateStatisticsML(pEvidences, 1); } else { pCurrentInfEng->MarginalNodes(domain, DomainSize, 1); const CPotential * pMargPot = pCurrentInfEng->GetQueryJPD(); parameter ->UpdateStatisticsEM(pMargPot, pCurrentEvid); } } else { const CEvidence *pEvidences[] = { pCurrentEvid }; parameter->UpdateStatisticsML(pEvidences, 1); } } itsML = itsML || !infIsNeed; } for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++ ) { parameter = pGrModel->GetFactor(domainNodes); CNumericDenseMatrix<float> *matForSending; int matDim; const int *pMatRanges; int dataLength; const float *pDataForSending; matForSending = static_cast<CNumericDenseMatrix<float>*> ((parameter->GetDistribFun())->GetStatisticalMatrix(stMatTable)); matForSending->GetRanges(&matDim, &pMatRanges); matForSending->GetRawData(&dataLength, &pDataForSending); float *pDataRecv = new float[dataLength]; float *pDataRecv_copy = new float[dataLength]; MPI_Status status; MPI_Allreduce((void*)pDataForSending, pDataRecv, dataLength, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); CNumericDenseMatrix<float> *RecvMatrix = static_cast<CNumericDenseMatrix<float>*> (parameter->GetDistribFun()->GetStatisticalMatrix(stMatTable)); int dataLength_new; float *pData_new; RecvMatrix->GetRawData(&dataLength_new, (const float**)(&pData_new)); for(int t=0;t<dataLength_new;t++) pData_new[t]=pDataRecv[t]; } switch (pGrModel->GetModelType()) { case mtBNet: { loglikOld = loglik; loglik = 0.0f; for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++) { parameter = pGrModel->GetFactor(domainNodes); loglik += parameter->ProcessingStatisticalData(m_numberOfAllEvidences); } break; } case mtMRF2: case mtMNet: { loglikOld = loglik; loglik = _LearnPotentials(); break; } default: { PNL_THROW(CBadConst, "model type") break; } } stopExpression = float(fabs(2 * (loglikOld - loglik) / (loglikOld + loglik))); exit = ((stopExpression > epsilon) && (iteration <= GetMaxIterEM())) && !itsML; if(exit) { ClearStatisticData(); } delete pCurrentInfEng; pCurrentInfEng = NULL; }while(exit); if(iteration > GetMaxIterEM()) { PNL_THROW(CNotConverged, "maximum number of iterations") } SetNumProcEv( GetNumEv() ); }
void CParEMLearningEngine::LearnOMP() { CStaticGraphicalModel *pGrModel = this->GetStaticModel(); PNL_CHECK_IS_NULL_POINTER(pGrModel); PNL_CHECK_LEFT_BORDER(GetNumEv() - GetNumberProcEv() , 1); //omp_set_num_threads(2); int numberOfThreads = omp_get_num_procs(); //CParPearlInfEngine **pCurrentInfEng = new CParPearlInfEngine*[numberOfThreads]; CJtreeInfEngine **pCurrentInfEng = new CJtreeInfEngine*[numberOfThreads]; for (int i = 0; i < numberOfThreads; i++) pCurrentInfEng[i] = NULL; CFactor *parameter1 = NULL; int exit = 0; int numberOfParameters = pGrModel->GetNumberOfParameters(); int domainNodes; //int itsML = 0; // !!! float loglik = -FLT_MAX; float loglikOld = -FLT_MAX; float epsilon = GetPrecisionEM(); float stopExpression = epsilon + 1.0f; int iteration = 0; int ev; // to create additional factors CFactor **ppAllFactors = new CFactor*[numberOfParameters*numberOfThreads]; bool *was_updated = new bool[numberOfParameters*numberOfThreads]; int factor; #pragma omp parallel for private(factor) default(shared) for (factor = 0; factor < numberOfParameters; factor++) { ppAllFactors[factor] = pGrModel->GetFactor(factor); ppAllFactors[factor]->GetDistribFun()->ClearStatisticalData(); was_updated[factor] = false; for (int proc = 1; proc < numberOfThreads; proc++) { ppAllFactors[factor + proc * numberOfParameters] = ppAllFactors[factor]->Clone(); ppAllFactors[factor + proc * numberOfParameters]->GetDistribFun()-> ClearStatisticalData(); was_updated[factor + proc * numberOfParameters]= false; }; }; int* itsML = new int[numberOfThreads]; for (int delta = 0; delta < numberOfThreads; delta++) { itsML[delta] = 0; }; int start_ev, end_ev; do { iteration++; start_ev = GetNumberProcEv(); end_ev = GetNumEv(); #pragma omp parallel for schedule(dynamic) private(ev) for (ev = start_ev; ev < end_ev ; ev++) { CFactor *parameter = NULL; int DomainNodes_new; int bMaximize = 0; int bSumOnMixtureNode = 0; int infIsNeed = 0; int currentEvidNumber = ev; // !!! const CEvidence* pCurrentEvid = m_Vector_pEvidences[currentEvidNumber]; infIsNeed = !GetObsFlags(ev)->empty(); // !!! int Num_thread = omp_get_thread_num(); if (infIsNeed) { if (!pCurrentInfEng[Num_thread]) { pCurrentInfEng[Num_thread] = CJtreeInfEngine::Create( (const CStaticGraphicalModel *)pGrModel); } pCurrentInfEng[Num_thread]->EnterEvidence(pCurrentEvid, bMaximize, bSumOnMixtureNode); } for (DomainNodes_new = 0; DomainNodes_new < numberOfParameters; DomainNodes_new++) { parameter = ppAllFactors[DomainNodes_new + Num_thread * numberOfParameters]; if (infIsNeed) { int DomainSize; const int *domain; parameter->GetDomain(&DomainSize, &domain); if (IsDomainObserved(DomainSize, domain, currentEvidNumber)) { const CEvidence *pEvidences[] = { pCurrentEvid }; parameter->UpdateStatisticsML(pEvidences, 1); was_updated[DomainNodes_new+Num_thread*numberOfParameters]= true; } else { pCurrentInfEng[Num_thread]->MarginalNodes(domain, DomainSize, 1); const CPotential * pMargPot = pCurrentInfEng[Num_thread]->GetQueryJPD(); parameter ->UpdateStatisticsEM(pMargPot, pCurrentEvid); was_updated[DomainNodes_new+Num_thread*numberOfParameters]= true; } } else { const CEvidence *pEvidences[] = { pCurrentEvid }; parameter->UpdateStatisticsML(pEvidences, 1); was_updated[DomainNodes_new+Num_thread*numberOfParameters]= true; } } itsML[Num_thread] = itsML[Num_thread] || !infIsNeed; } // end of parallel for for (int delta = 1; delta < numberOfThreads; delta++) { itsML[0] = itsML[0] || itsML[delta]; }; //to join factors #pragma omp parallel for private(factor) default(shared) for (factor = 0; factor < numberOfParameters; factor++) { for (int proc = 1; proc < numberOfThreads; proc++) { if (was_updated[factor+proc*numberOfParameters]) { ppAllFactors[factor]->UpdateStatisticsML(ppAllFactors[factor + proc*numberOfParameters]); ppAllFactors[factor+proc*numberOfParameters]->GetDistribFun()-> ClearStatisticalData(); }; was_updated[factor+proc*numberOfParameters] = false; }; }; switch (pGrModel->GetModelType()) { case mtBNet: { loglikOld = loglik; loglik = 0.0f; for (domainNodes = 0; domainNodes < numberOfParameters; domainNodes++) { parameter1 = pGrModel->GetFactor(domainNodes); loglik += parameter1->ProcessingStatisticalData( m_numberOfAllEvidences); } break; } case mtMRF2: case mtMNet: { loglikOld = loglik; loglik = _LearnPotentials(); break; } default: { PNL_THROW(CBadConst, "model type") break; } } stopExpression = float(fabs(2 * (loglikOld - loglik) / (loglikOld + loglik))); exit = ((stopExpression > epsilon) && (iteration <= GetMaxIterEM())) && !itsML[0]; if (exit) { ClearStatisticData(); } m_critValue.push_back(loglik); for (int j = 0; j < numberOfThreads; j++) { delete pCurrentInfEng[j]; pCurrentInfEng[j] = NULL; } } while (exit); delete [] pCurrentInfEng; //”даление дополнительных факторов for (factor = numberOfParameters; factor < numberOfParameters * numberOfThreads; factor++) { delete ppAllFactors[factor]; }; delete[] ppAllFactors; delete[] was_updated; if (iteration > GetMaxIterEM()) { PNL_THROW(CNotConverged, "maximum number of iterations") } SetNumProcEv( GetNumEv() ); }