CCPD* CMlStaticStructLearn::CreateRandomCPD(int nfamily, const int* family, CGraphicalModel* pGrModel) { CModelDomain* pMD = pGrModel->GetModelDomain(); EDistributionType dt = pnlDetermineDistributionType( pMD, nfamily, family, NULL); CCPD* pCPD; int i; int j; // checking SoftMax distribution for( i = 0; i < nfamily; i++ ) { if( (!pMD->GetVariableType(family[i])->IsDiscrete()) && pMD->GetVariableType(family[nfamily-1])->IsDiscrete() ) { for( j = 0; j < nfamily-1; j++ ) if(pMD->GetVariableType(family[j])->IsDiscrete()) { dt = dtCondSoftMax; break; }; dt = dtSoftMax; break; } } // end of checking switch (dt) { case dtTabular : pCPD = CTabularCPD::Create(family, nfamily, pMD); pCPD->CreateAllNecessaryMatrices(1); break; case dtTree : pCPD = CTreeCPD::Create(family, nfamily, pMD); break; case dtGaussian : case dtCondGaussian : pCPD = CGaussianCPD::Create(family, nfamily, pMD); pCPD->CreateAllNecessaryMatrices(1); break; case dtSoftMax: case dtCondSoftMax: pCPD = CSoftMaxCPD::Create(family, nfamily, pMD); pCPD->CreateAllNecessaryMatrices(1); break; default: PNL_THROW(CNotImplemented, "This type of distribution has not been implemented yet"); break; } return pCPD; }
CCPD* CStaticStructLearnSEM::CreateRandomCPD(int nfamily, const int* family, CBNet* pBNet) { int child = family[nfamily-1]; CModelDomain* pMD = pBNet->GetModelDomain(); CFactor* factor = pBNet->GetFactor(child); EDistributionType dt = factor->GetDistributionType(); CCPD* pCPD; if( dt == dtTabular ) { pCPD = CTabularCPD::Create(family, nfamily, pMD); pCPD->CreateAllNecessaryMatrices(1); return pCPD; } else { if( dt == dtMixGaussian ) { floatVector data; static_cast<CMixtureGaussianCPD*>(factor)->GetProbabilities(&data); pCPD = CMixtureGaussianCPD::Create(family, nfamily, pMD, &data.front()); static_cast<CCondGaussianDistribFun*>(pCPD->GetDistribFun()) -> CreateDefaultMatrices(1); return pCPD; } else { if( (dt == dtGaussian) || (dt == dtCondGaussian) ) { pCPD = CGaussianCPD::Create(family, nfamily, pMD); pCPD->CreateAllNecessaryMatrices(1); return pCPD; } else PNL_THROW(CNotImplemented, "this type of distribution is not supported yet"); } } }
CCPD* CMlStaticStructLearn::ComputeFactor(intVector vFamily, CGraphicalModel* pGrModel, CEvidence** pEvidences) { int nFamily = vFamily.size(); int DomainSize; const int * domain; const CEvidence * pEv; int i; CTabularDistribFun *pDistribFun; CCPD* iCPD = this->CreateRandomCPD(nFamily, &vFamily.front(), pGrModel); int ncases = m_Vector_pEvidences.size(); if ( !(iCPD->GetDistributionType() == dtSoftMax)) { if (m_ScoreMethod != MarLh) { iCPD->UpdateStatisticsML( pEvidences, ncases ); iCPD->ProcessingStatisticalData(ncases); } else { iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(m_K2alfa); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } pDistribFun->PriorToCPD(); } } else { float **evid = NULL; float **full_evid = NULL; BuildFullEvidenceMatrix(&full_evid); CSoftMaxCPD* SoftMaxFactor = (CSoftMaxCPD*)iCPD; SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid,vFamily, m_Vector_pEvidences.size()); SoftMaxFactor->InitLearnData(); SoftMaxFactor->SetMaximizingMethod(mmGradient); SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(), 0.00001f, 0.01f); SoftMaxFactor->CopyLearnDataToDistrib(); for (int k = 0; k < SoftMaxFactor->GetDomainSize(); k++) { delete [] evid[k]; } delete [] evid; int i; intVector obsNodes; (m_Vector_pEvidences[0])->GetAllObsNodes(&obsNodes); for (i=0; i<obsNodes.size(); i++) { delete [] full_evid[i]; } delete [] full_evid; }; return iCPD; }
float CMlStaticStructLearn::ComputeFamilyScore(intVector vFamily) { int nFamily = vFamily.size(); CCPD* iCPD = this->CreateRandomCPD(nFamily, &vFamily.front(), m_pGrModel); CTabularDistribFun *pDistribFun; int ncases = m_Vector_pEvidences.size(); const CEvidence * pEv; float score; float pred = 0; EDistributionType NodeType; switch (m_ScoreMethod) { case MaxLh : if ( !((iCPD->GetDistribFun()->GetDistributionType() == dtSoftMax) || (iCPD->GetDistribFun()->GetDistributionType() == dtCondSoftMax))) { iCPD->UpdateStatisticsML( &m_Vector_pEvidences.front(), ncases ); score = iCPD->ProcessingStatisticalData(ncases); } else { float **evid = NULL; float **full_evid = NULL; BuildFullEvidenceMatrix(&full_evid); CSoftMaxCPD* SoftMaxFactor = static_cast<CSoftMaxCPD*>(iCPD); SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid, vFamily,m_Vector_pEvidences.size()); SoftMaxFactor->InitLearnData(); SoftMaxFactor->SetMaximizingMethod(mmGradient); SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(), 0.00001f, 0.01f); SoftMaxFactor->CopyLearnDataToDistrib(); if (SoftMaxFactor->GetDistribFun()->GetDistributionType() == dtSoftMax) { score = ((CSoftMaxDistribFun*)SoftMaxFactor->GetDistribFun())->CalculateLikelihood(evid,ncases); } else { score = ((CCondSoftMaxDistribFun*)SoftMaxFactor->GetDistribFun())->CalculateLikelihood(evid,ncases); }; for (int k = 0; k < SoftMaxFactor->GetDomainSize(); k++) { delete [] evid[k]; } delete [] evid; int i; intVector obsNodes; (m_Vector_pEvidences[0])->GetAllObsNodes(&obsNodes); for (i=0; i<obsNodes.size(); i++) { delete [] full_evid[i]; } delete [] full_evid; }; break; case PreAs : int i; NodeType = iCPD->GetDistributionType(); switch (NodeType) { case dtTabular : for(i = 0; i < ncases; i++) { pConstEvidenceVector tempEv(0); tempEv.push_back(m_Vector_pEvidences[i]); iCPD->UpdateStatisticsML(&tempEv.front(), tempEv.size()); iCPD->ProcessingStatisticalData(tempEv.size()); pred += log(((CTabularCPD*)iCPD)->GetMatrixValue(m_Vector_pEvidences[i])); } break; case dtGaussian : for(i = 0; i < ncases; i += 1 ) { pConstEvidenceVector tempEv(0); tempEv.push_back(m_Vector_pEvidences[i]); iCPD->UpdateStatisticsML(&tempEv.front(), tempEv.size()); float tmp = 0; if (i != 0) { tmp =iCPD->ProcessingStatisticalData(1); pred +=tmp; } } break; case dtSoftMax: PNL_THROW(CNotImplemented, "This type score method has not been implemented yet"); break; default: PNL_THROW(CNotImplemented, "This type score method has not been implemented yet"); break; }; score = pred; break; case MarLh : { //проверка того, что потенциал дискретный if (iCPD->GetDistributionType() != dtTabular) { PNL_THROW(CNotImplemented, "This type of score method has been implemented only for discrete nets"); } int DomainSize; const int * domain; switch(m_priorType) { case Dirichlet: iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } score = pDistribFun->CalculateBayesianScore(); break; case K2: iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(m_K2alfa); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } score = pDistribFun->CalculateBayesianScore(); break; case BDeu: iCPD->GetDomain(&DomainSize, &domain); pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun()); pDistribFun->InitPseudoCounts(); for (i=0; i<ncases; i++) { pEv = m_Vector_pEvidences[i]; const CEvidence *pEvidences[] = { pEv }; pDistribFun->BayesUpdateFactor(pEvidences, 1, domain); } score = pDistribFun->CalculateBayesianScore() / iCPD->GetNumberOfFreeParameters(); break; default: PNL_THROW(CNotImplemented, "This type of prior has not been implemented yet"); break; } break; } default : PNL_THROW(CNotImplemented, "This type score method has not been implemented yet"); break; } int dim = iCPD->GetNumberOfFreeParameters(); switch (m_ScoreType) { case BIC : score -= 0.5f * float(dim) * float(log(float(ncases))); break; case AIC : score -= 0.5f * float(dim); break; case WithoutFine: break; case VAR : PNL_THROW(CNotImplemented, "This type score function has not been implemented yet"); break; default: PNL_THROW(CNotImplemented, "This type score function has not been implemented yet"); break; } delete iCPD; return score; }