CCPD* CStaticStructLearnSEM::CreateRandomCPD(int nfamily, const int* family, CBNet* pBNet) { int child = family[nfamily-1]; CModelDomain* pMD = pBNet->GetModelDomain(); CFactor* factor = pBNet->GetFactor(child); EDistributionType dt = factor->GetDistributionType(); CCPD* pCPD; if( dt == dtTabular ) { pCPD = CTabularCPD::Create(family, nfamily, pMD); pCPD->CreateAllNecessaryMatrices(1); return pCPD; } else { if( dt == dtMixGaussian ) { floatVector data; static_cast<CMixtureGaussianCPD*>(factor)->GetProbabilities(&data); pCPD = CMixtureGaussianCPD::Create(family, nfamily, pMD, &data.front()); static_cast<CCondGaussianDistribFun*>(pCPD->GetDistribFun()) -> CreateDefaultMatrices(1); return pCPD; } else { if( (dt == dtGaussian) || (dt == dtCondGaussian) ) { pCPD = CGaussianCPD::Create(family, nfamily, pMD); pCPD->CreateAllNecessaryMatrices(1); return pCPD; } else PNL_THROW(CNotImplemented, "this type of distribution is not supported yet"); } } }
int CBICLearningEngine::DimOfModel(const CStaticGraphicalModel *pModel) { /* compute dimension of the model in (d) it using in BIC criterion: BIC = LogLic - 0.5*d*log(N) */ int nParam = pModel->GetNumberOfFactors(); CFactor *param = NULL; int dimOfModel = 0; int dim = 1; CMatrix<float> *matrix;; for (int i = 0; i < nParam; i++) { dim = 1; param = pModel->GetFactor(i); switch (param->GetFactorType()) { case ftCPD: { switch (param->GetDistributionType()) { case dtTabular: { matrix = param->GetMatrix(matTable); int size; const int *ranges; static_cast<CNumericDenseMatrix<float>*>(matrix)-> GetRanges(&size, &ranges); for(int j=0; j < size - 1; j++) { dim *= ranges[j]; } dim *= ranges[size-1]-1; break; }//case dtTabular case dtGaussian: { PNL_THROW(CNotImplemented,"Gaussian") break; }//case dtGaussian case dtCondGaussian: { PNL_THROW(CNotImplemented,"CondGaussian") break; }//case dtCondGaussian default: { PNL_THROW(CBadConst,"distribution type") break; } }//swith(param->GetFactorType) break; }//end case ftCPD case ftPotential: { PNL_THROW(CNotImplemented,"Factor") break; } default: { PNL_THROW(CBadConst,"FactorType") break; } }//end switch(param->GetFactor) dimOfModel += dim; }//end for(i) return dimOfModel; }
void CEMLearningEngine::Learn() { CStaticGraphicalModel *pGrModel = this->GetStaticModel(); PNL_CHECK_IS_NULL_POINTER(pGrModel); PNL_CHECK_LEFT_BORDER(GetNumEv() - GetNumberProcEv() , 1); CInfEngine *pInfEng = NULL; if (m_pInfEngine) { pInfEng = m_pInfEngine; } else { if (!m_bAllObserved) { pInfEng = CJtreeInfEngine::Create(pGrModel); m_pInfEngine = pInfEng; } } float loglik = 0.0f; int nFactors = pGrModel->GetNumberOfFactors(); const CEvidence *pEv; CFactor *pFactor; int iteration = 0; int ev; bool IsCastNeed = false; int i; for( i = 0; i < nFactors; i++ ) { pFactor = pGrModel->GetFactor(i); EDistributionType dt = pFactor->GetDistributionType(); if ( dt == dtSoftMax ) IsCastNeed = true; } float ** full_evid = NULL; if (IsCastNeed) { BuildFullEvidenceMatrix(&full_evid); } if (IsAllObserved()) { int i; float **evid = NULL; EDistributionType dt; CFactor *factor = NULL; for (i = 0; i < nFactors; i++) { factor = pGrModel->GetFactor(i); dt = factor->GetDistributionType(); if (dt != dtSoftMax) { factor->UpdateStatisticsML(&m_Vector_pEvidences[GetNumberProcEv()], GetNumEv() - GetNumberProcEv()); } else { intVector family; family.resize(0); pGrModel->GetGraph()->GetParents(i, &family); family.push_back(i); CSoftMaxCPD* SoftMaxFactor = static_cast<CSoftMaxCPD*>(factor); SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid,family,m_Vector_pEvidences.size()); SoftMaxFactor->InitLearnData(); SoftMaxFactor->SetMaximizingMethod(m_MaximizingMethod); SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(), 0.00001f, 0.01f); SoftMaxFactor->CopyLearnDataToDistrib(); for (int k = 0; k < factor->GetDomainSize(); k++) { delete [] evid[k]; } delete [] evid; } } m_critValue.push_back(UpdateModel()); } else { bool bContinue; const CPotential * pot; /* bool IsCastNeed = false; int i; for( i = 0; i < nFactors; i++ ) { pFactor = pGrModel->GetFactor(i); EDistributionType dt = pFactor->GetDistributionType(); if ( dt == dtSoftMax ) IsCastNeed = true; } float ** full_evid; if (IsCastNeed) { BuildFullEvidenceMatrix(full_evid); }*/ do { ClearStatisticData(); iteration++; for( ev = GetNumberProcEv(); ev < GetNumEv() ; ev++ ) { bool bInfIsNeed = !GetObsFlags(ev)->empty(); pEv = m_Vector_pEvidences[ev]; if( bInfIsNeed ) { pInfEng->EnterEvidence(pEv, 0, 0); } int i; for( i = 0; i < nFactors; i++ ) { pFactor = pGrModel->GetFactor(i); int nnodes; const int * domain; pFactor->GetDomain( &nnodes, &domain ); if( bInfIsNeed && !IsDomainObserved(nnodes, domain, ev ) ) { pInfEng->MarginalNodes( domain, nnodes, 1 ); pot = pInfEng->GetQueryJPD(); if ( (!(m_Vector_pEvidences[ev])->IsNodeObserved(i)) && (IsCastNeed) ) { Cast(pot, i, ev, &full_evid); } EDistributionType dt; dt = pFactor->GetDistributionType(); if ( !(dt == dtSoftMax) ) pFactor->UpdateStatisticsEM( /*pInfEng->GetQueryJPD */ pot, pEv ); } else { if ((pFactor->GetDistributionType()) != dtSoftMax) pFactor->UpdateStatisticsML( &pEv, 1 ); } } } int i; /* printf ("\n My Full Evidence Matrix"); for (i=0; i<nFactors; i++) { for (j=0; j<GetNumEv(); j++) { printf ("%f ", full_evid[i][j]); } printf("\n"); } */ float **evid = NULL; EDistributionType dt; CFactor *factor = NULL; // int i; for (i = 0; i < nFactors; i++) { factor = pGrModel->GetFactor(i); dt = factor->GetDistributionType(); if (dt == dtSoftMax) { intVector family; family.resize(0); pGrModel->GetGraph()->GetParents(i, &family); family.push_back(i); CSoftMaxCPD* SoftMaxFactor = static_cast<CSoftMaxCPD*>(factor); SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid,family,m_Vector_pEvidences.size()); SoftMaxFactor->InitLearnData(); SoftMaxFactor->SetMaximizingMethod(m_MaximizingMethod); // SoftMaxFactor->MaximumLikelihood(evid, m_numberOfLastEvidences, SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(), 0.00001f, 0.01f); SoftMaxFactor->CopyLearnDataToDistrib(); for (int k = 0; k < factor->GetDomainSize(); k++) { delete [] evid[k]; } delete [] evid; } } loglik = UpdateModel(); if( GetMaxIterEM() != 1) { bool flag = iteration == 1 ? true : (fabs(2*(m_critValue.back()-loglik)/(m_critValue.back() + loglik)) > GetPrecisionEM() ); bContinue = GetMaxIterEM() > iteration && flag; } else { bContinue = false; } m_critValue.push_back(loglik); }while(bContinue); } SetNumProcEv( GetNumEv() ); if (IsCastNeed) { int NumOfNodes = pGrModel->GetGraph()->GetNumberOfNodes(); for (i=0; i<NumOfNodes; i++) { delete [] full_evid[i]; } delete [] full_evid; } }
CBNet *C1_5SliceInfEngine::Create1_5SliceBNet() { CGraph *p1_5SliceGraph = Create1_5SliceGraph(); PNL_CHECK_IF_MEMORY_ALLOCATED(p1_5SliceGraph); intVecVector comp; p1_5SliceGraph->GetConnectivityComponents(&comp); PNL_CHECK_FOR_NON_ZERO(comp.size() -1 ); nodeTypeVector nodeTypes; int nnodes = p1_5SliceGraph->GetNumberOfNodes(); GrModel()->GetModelDomain()->GetVariableTypes(&nodeTypes); const int *nodeAssociatons = GrModel()->GetNodeAssociations(); intVector FinalNodeAssociations; FinalNodeAssociations.resize(nnodes); int numberOfInterfaceNodes; const int *interfaceNodes; GrModel()->GetInterfaceNodes(&numberOfInterfaceNodes, &interfaceNodes); int nnodesPerSlice = GrModel()->GetNumberOfNodes(); int node; for( node = 0; node < numberOfInterfaceNodes; node++ ) { FinalNodeAssociations[node]= nodeAssociatons[interfaceNodes[node]]; } for ( node = numberOfInterfaceNodes; node < nnodes; node++ ) { FinalNodeAssociations[node]= nodeAssociatons[nnodesPerSlice - numberOfInterfaceNodes + node]; } CBNet *p1_5SliceGrModel = CBNet::Create( nnodes, nodeTypes.size(), &nodeTypes.front(), &FinalNodeAssociations.front(), p1_5SliceGraph ); p1_5SliceGrModel->AllocFactors(); CFactor *pFactor; intVector domain(1); CFactor *pUnitFactor; for ( node = 0; node < numberOfInterfaceNodes; node++ ) { domain[0] = node; if( GrModel()->GetNodeType(interfaceNodes[node])->IsDiscrete() ) { pUnitFactor = CTabularCPD::CreateUnitFunctionCPD( domain, p1_5SliceGrModel->GetModelDomain()); } else { pUnitFactor = CGaussianCPD::CreateUnitFunctionCPD( domain, p1_5SliceGrModel->GetModelDomain()); } PNL_CHECK_IF_MEMORY_ALLOCATED( pUnitFactor ); p1_5SliceGrModel->AttachFactor( pUnitFactor ); } for ( node = numberOfInterfaceNodes; node < nnodes; node++ ) { domain.clear(); p1_5SliceGraph->GetParents(node, &domain); domain.push_back(node); int num = nnodesPerSlice - numberOfInterfaceNodes + node; /* pFactor = CFactor:: CopyWithNewDomain(GrModel()->GetFactor( num ), domain, p1_5SliceGrModel->GetModelDomain()); p1_5SliceGrModel->AttachFactor(pFactor ); */ pFactor = GrModel()->GetFactor(num); if( pFactor->GetDistributionType() == dtMixGaussian ) { floatVector prob; static_cast<CMixtureGaussianCPD *>(pFactor)->GetProbabilities(&prob); CMixtureGaussianCPD *pCPD = CMixtureGaussianCPD::Create(domain, p1_5SliceGrModel->GetModelDomain(), prob ); pCPD->TieDistribFun(pFactor); p1_5SliceGrModel->AttachFactor(pCPD); } else { p1_5SliceGrModel->AllocFactor(node); p1_5SliceGrModel->GetFactor(node)->TieDistribFun(GrModel()->GetFactor(num)); } } return p1_5SliceGrModel; }