void CBayesLearningEngine::Learn() { if(!m_pGrModel) { PNL_THROW( CNULLPointer, "no graphical model") } CStaticGraphicalModel *grmodel = this->GetStaticModel(); CFactor *factor = NULL; int numberOfFactors = grmodel->GetNumberOfFactors(); int domainNodes; if(m_numberOfLearnedEvidences == m_numberOfAllEvidences) { PNL_THROW(COutOfRange, "number of unlearned evidences must be positive") } int currentEvidNumber; const CEvidence* pCurrentEvid; //below code is intended to work on tabular CPD and gaussian CPD //later we will generalize it for other distribution types if ((grmodel->GetFactor(0))->GetDistributionType() == dtTabular) { for( int ev = m_numberOfLearnedEvidences; ev < m_numberOfAllEvidences; ev++) { currentEvidNumber = ev; pCurrentEvid = m_Vector_pEvidences[currentEvidNumber]; if( !pCurrentEvid) { PNL_THROW(CNULLPointer, "evidence") } for( domainNodes = 0; domainNodes < numberOfFactors; domainNodes++ ) { factor = grmodel->GetFactor( domainNodes ); int DomainSize; const int *domain; factor->GetDomain( &DomainSize, &domain ); const CEvidence *pEvidences[] = { pCurrentEvid }; CTabularDistribFun* pDistribFun = (CTabularDistribFun*)(factor->GetDistribFun()); pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } } } else { for( domainNodes = 0; domainNodes < numberOfFactors; domainNodes++ )
CCPD* CMlStaticStructLearn::ComputeFactor(intVector vFamily, CGraphicalModel* pGrModel, CEvidence** pEvidences) { int nFamily = vFamily.size(); int DomainSize; const int * domain; const CEvidence * pEv; int i; CTabularDistribFun *pDistribFun; CCPD* iCPD = this->CreateRandomCPD(nFamily, &vFamily.front(), pGrModel); int ncases = m_Vector_pEvidences.size(); if ( !(iCPD->GetDistributionType() == dtSoftMax)) { if (m_ScoreMethod != MarLh) { iCPD->UpdateStatisticsML( pEvidences, ncases ); iCPD->ProcessingStatisticalData(ncases); } else { iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(m_K2alfa); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } pDistribFun->PriorToCPD(); } } else { float **evid = NULL; float **full_evid = NULL; BuildFullEvidenceMatrix(&full_evid); CSoftMaxCPD* SoftMaxFactor = (CSoftMaxCPD*)iCPD; SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid,vFamily, m_Vector_pEvidences.size()); SoftMaxFactor->InitLearnData(); SoftMaxFactor->SetMaximizingMethod(mmGradient); SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(), 0.00001f, 0.01f); SoftMaxFactor->CopyLearnDataToDistrib(); for (int k = 0; k < SoftMaxFactor->GetDomainSize(); k++) { delete [] evid[k]; } delete [] evid; int i; intVector obsNodes; (m_Vector_pEvidences[0])->GetAllObsNodes(&obsNodes); for (i=0; i<obsNodes.size(); i++) { delete [] full_evid[i]; } delete [] full_evid; }; return iCPD; }
float CMlStaticStructLearn::ComputeFamilyScore(intVector vFamily) { int nFamily = vFamily.size(); CCPD* iCPD = this->CreateRandomCPD(nFamily, &vFamily.front(), m_pGrModel); CTabularDistribFun *pDistribFun; int ncases = m_Vector_pEvidences.size(); const CEvidence * pEv; float score; float pred = 0; EDistributionType NodeType; switch (m_ScoreMethod) { case MaxLh : if ( !((iCPD->GetDistribFun()->GetDistributionType() == dtSoftMax) || (iCPD->GetDistribFun()->GetDistributionType() == dtCondSoftMax))) { iCPD->UpdateStatisticsML( &m_Vector_pEvidences.front(), ncases ); score = iCPD->ProcessingStatisticalData(ncases); } else { float **evid = NULL; float **full_evid = NULL; BuildFullEvidenceMatrix(&full_evid); CSoftMaxCPD* SoftMaxFactor = static_cast<CSoftMaxCPD*>(iCPD); SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid, vFamily,m_Vector_pEvidences.size()); SoftMaxFactor->InitLearnData(); SoftMaxFactor->SetMaximizingMethod(mmGradient); SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(), 0.00001f, 0.01f); SoftMaxFactor->CopyLearnDataToDistrib(); if (SoftMaxFactor->GetDistribFun()->GetDistributionType() == dtSoftMax) { score = ((CSoftMaxDistribFun*)SoftMaxFactor->GetDistribFun())->CalculateLikelihood(evid,ncases); } else { score = ((CCondSoftMaxDistribFun*)SoftMaxFactor->GetDistribFun())->CalculateLikelihood(evid,ncases); }; for (int k = 0; k < SoftMaxFactor->GetDomainSize(); k++) { delete [] evid[k]; } delete [] evid; int i; intVector obsNodes; (m_Vector_pEvidences[0])->GetAllObsNodes(&obsNodes); for (i=0; i<obsNodes.size(); i++) { delete [] full_evid[i]; } delete [] full_evid; }; break; case PreAs : int i; NodeType = iCPD->GetDistributionType(); switch (NodeType) { case dtTabular : for(i = 0; i < ncases; i++) { pConstEvidenceVector tempEv(0); tempEv.push_back(m_Vector_pEvidences[i]); iCPD->UpdateStatisticsML(&tempEv.front(), tempEv.size()); iCPD->ProcessingStatisticalData(tempEv.size()); pred += log(((CTabularCPD*)iCPD)->GetMatrixValue(m_Vector_pEvidences[i])); } break; case dtGaussian : for(i = 0; i < ncases; i += 1 ) { pConstEvidenceVector tempEv(0); tempEv.push_back(m_Vector_pEvidences[i]); iCPD->UpdateStatisticsML(&tempEv.front(), tempEv.size()); float tmp = 0; if (i != 0) { tmp =iCPD->ProcessingStatisticalData(1); pred +=tmp; } } break; case dtSoftMax: PNL_THROW(CNotImplemented, "This type score method has not been implemented yet"); break; default: PNL_THROW(CNotImplemented, "This type score method has not been implemented yet"); break; }; score = pred; break; case MarLh : { //проверка того, что потенциал дискретный if (iCPD->GetDistributionType() != dtTabular) { PNL_THROW(CNotImplemented, "This type of score method has been implemented only for discrete nets"); } int DomainSize; const int * domain; switch(m_priorType) { case Dirichlet: iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } score = pDistribFun->CalculateBayesianScore(); break; case K2: iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(m_K2alfa); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } score = pDistribFun->CalculateBayesianScore(); break; case BDeu: iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } score = pDistribFun->CalculateBayesianScore() / iCPD->GetNumberOfFreeParameters(); break; default: PNL_THROW(CNotImplemented, "This type of prior has not been implemented yet"); break; } break; } default : PNL_THROW(CNotImplemented, "This type score method has not been implemented yet"); break; } int dim = iCPD->GetNumberOfFreeParameters(); switch (m_ScoreType) { case BIC : score -= 0.5f * float(dim) * float(log(float(ncases))); break; case AIC : score -= 0.5f * float(dim); break; case WithoutFine: break; case VAR : PNL_THROW(CNotImplemented, "This type score function has not been implemented yet"); break; default: PNL_THROW(CNotImplemented, "This type score function has not been implemented yet"); break; } delete iCPD; return score; }