void CMlStaticStructLearn::CreateResultBNet(CDAG* pDAG) { int i, j, k, ns; int nnodes = m_nNodes; CDAG* iDAG = pDAG->TopologicalCreateDAG(m_vResultRenaming); nodeTypeVector vnt; m_pGrModel->GetNodeTypes(&vnt); intVector na(nnodes); const int* nas = m_pGrModel->GetNodeAssociations(); for(i=0; i<nnodes; i++) na[i] = nas[m_vResultRenaming[i]]; m_pResultBNet = CBNet::Create(nnodes, vnt.size(), &vnt.front(), &na.front(), static_cast<CGraph*>(iDAG)); const CNodeType* nt; int nEv = m_Vector_pEvidences.size(); CEvidence** pEv = new CEvidence*[nEv]; intVector obsnodes(nnodes); for(i=0; i<nnodes; i++) obsnodes[i] = i; valueVector new_data; const Value* val; for(i = 0 ; i < nEv; i++) { for(j=0; j<nnodes; j++) { val = m_Vector_pEvidences[i]->GetValue(m_vResultRenaming[j]); nt = m_pResultBNet->GetNodeType(j); if(nt->IsDiscrete()) { new_data.push_back(*val); } else { ns = nt->GetNodeSize(); for(k=0; k<ns; k++) new_data.push_back(*(val+k)); } } pEv[i] = CEvidence::Create(m_pResultBNet, nnodes, &obsnodes.front(), new_data); new_data.clear(); } vnt.clear(); intVector vFamily; m_pResultBNet->AllocFactors(); for(i=0; i<nnodes; i++) { vFamily.clear(); iDAG->GetParents(i, &vFamily); vFamily.push_back(i); CCPD* iCPD = ComputeFactor(vFamily, m_pResultBNet, pEv); m_pResultBNet->AttachFactor(iCPD); } for(i=0; i<nEv; i++)delete pEv[i]; delete[] pEv; }
void CMlDynamicStructLearn::Learn() { RearrangeEvidences(); int nNodesSlice = m_pGrModel->GetNumberOfNodes(); intVector vAncestor, vDescent; for(int i=0; i<nNodesSlice; i++) { vAncestor.push_back(i); vDescent.push_back(i+nNodesSlice); } //currently only the hill climbing algorithm available CMlStaticStructLearn* pSSL = CMlStaticStructLearnHC::Create(m_pGrModel->GetStaticModel(), itStructLearnML, StructLearnHC, BIC, m_nMaxFanIn, vAncestor, vDescent, m_nRestarts); pSSL->SetData(m_vEvidences.size(), &m_vEvidences.front()); pSSL->SetMaxIterIPF(m_nMaxIters); static_cast<CMlStaticStructLearnHC*>(pSSL) ->SetMinProgress(m_minProgress); pSSL->Learn(); const CDAG* p2SDAG = pSSL->GetResultDAG(); // p2SDAG->Dump(); if(!m_LearnPriorSlice) { m_pResultDag = const_cast<CDAG*>(p2SDAG)->Clone(); delete pSSL; return; } intVector vA, vD; CStaticGraphicalModel* pGrModel0 = m_pGrModel->CreatePriorSliceGrModel(); CMlStaticStructLearn* pSSL0 = CMlStaticStructLearnHC::Create(pGrModel0, itStructLearnML, StructLearnHC, BIC, m_nMaxFanIn, vA, vD, m_nRestarts); pSSL0->SetData(m_vEvidence0.size(), &m_vEvidence0.front()); pSSL0->SetMaxIterIPF(m_nMaxIters / 2); static_cast<CMlStaticStructLearnHC*>(pSSL0) ->SetMinProgress(m_minProgress); pSSL0->Learn(); const CDAG* p0SDAG = pSSL0->GetResultDAG(); // p0SDAG->Dump(); CDAG* pDAG = const_cast<CDAG*>(p2SDAG)->Clone(); if(pDAG->SetSubDag(vAncestor, const_cast<CDAG*>(p0SDAG))) { m_pResultDag = pDAG->Clone(); delete pDAG; } else PNL_THROW(CInternalError, "InternalError, can not generate a DAG"); delete pSSL; delete pSSL0; }
void CMlStaticStructLearnHC::Learn() { if(m_Vector_pEvidences.size() == 0) PNL_THROW(CInconsistentState, "should set the data first"); CGraph* iGraph = m_pGrModel->GetGraph(); CDAG* iDAG = CDAG::Create(*iGraph); CDAG* pDAG; CDAG* pBestDAG = NULL; float BestScore = (float)-1e37; int irestarts = m_nRestarts; int i, istart; float score; for(istart=0; istart<irestarts; istart++) { if(istart>0) { delete iDAG; intVector vDiscrete, vContinuous; const CNodeType* nt; for(i=0; i<m_nNodes; i++) { nt = m_pGrModel->GetNodeType(i); if( nt->IsDiscrete() ) vDiscrete.push_back(i); else vContinuous.push_back(i); } iDAG = CDAG::RandomCreateADAG(m_nNodes, m_nMaxFanIn, vDiscrete, vContinuous); } LearnInOneStart(iDAG, &pDAG, &score); if(score > BestScore) { delete pBestDAG; pBestDAG = pDAG->Clone(); BestScore = score; } delete pDAG; } delete iDAG; m_pResultDAG = pBestDAG->Clone(); m_critValue.push_back(BestScore); delete pBestDAG; }
void CMlStaticStructLearnHC::LearnInOneStart(CDAG* InitDag, CDAG** LearnedDag, float* LearnedScore) { int i, j, step=0; bool progress = true; CDAG* iDAG = InitDag->Clone(); floatVector FamilyScore; float init_score = ScoreDAG(iDAG, &FamilyScore); int nValidMoves; EDGEOPVECTOR vValidMoves; EDGEOP move; intVector newFamily; intVector vAncestor, vDescent; intVector vDiscrete, vContinuous; int start, end, position; const CNodeType* nt; for(i=0; i<m_nNodes; i++) { nt = m_pGrModel->GetNodeType(i); if( nt->IsDiscrete() ) vDiscrete.push_back(i); else vContinuous.push_back(i); } vAncestor.assign(m_vAncestor.begin(), m_vAncestor.end()); vDescent.assign(m_vDescent.begin(), m_vDescent.end()); while ( step<m_nSteps && progress ) { iDAG->GetAllValidMove(&vValidMoves,&vDiscrete, &vContinuous, &vDescent, &vAncestor); nValidMoves = vValidMoves.size(); float tmp_score, max_score = 0.0f; float tmp_start, max_start = 0.0f; float tmp_end, max_end = 0.0f; int max_index = 0; for(i=0; i<nValidMoves; i++) { newFamily.clear(); move = vValidMoves[i]; switch (move.DAGChangeType) { case DAG_DEL : start = move.originalEdge.startNode; end = move.originalEdge.endNode; iDAG->GetParents(end, &newFamily); newFamily.push_back(end); position = std::find(newFamily.begin(), newFamily.end(), start) - newFamily.begin(); newFamily.erase(newFamily.begin()+position); tmp_score = ScoreFamily(newFamily) - FamilyScore[end]; if(tmp_score > max_score) { max_score = tmp_score; max_index = i; } break; case DAG_ADD : start = move.originalEdge.startNode; end = move.originalEdge.endNode; iDAG->GetParents(end, &newFamily); position = newFamily.size(); for(j=0; j<newFamily.size(); j++) { if(start<newFamily[j]) { position = j; break; } } if(position == int(newFamily.size())) newFamily.push_back(start); else newFamily.insert(newFamily.begin()+position, start); newFamily.push_back(end); if(newFamily.size() > (m_nMaxFanIn+1)) break; tmp_score = ScoreFamily(newFamily) - FamilyScore[end]; if(tmp_score > max_score) { max_score = tmp_score; max_index = i; } break; case DAG_REV : start = move.originalEdge.startNode; end = move.originalEdge.endNode; iDAG->GetParents(start, &newFamily); //add an edge position = newFamily.size(); for(j=0; j<newFamily.size(); j++) { if(end<newFamily[j]) { position = j; break; } } if(position == int(newFamily.size())) newFamily.push_back(end); else newFamily.insert(newFamily.begin()+position, end); newFamily.push_back(start); if(newFamily.size() > (m_nMaxFanIn+1)) break; tmp_score = ScoreFamily(newFamily) - FamilyScore[start]; tmp_start = tmp_score; start = move.originalEdge.startNode; end = move.originalEdge.endNode; iDAG->GetParents(end, &newFamily); newFamily.push_back(end); position = std::find(newFamily.begin(), newFamily.end(), start) - newFamily.begin(); newFamily.erase(newFamily.begin()+position); tmp_score = ScoreFamily(newFamily) - FamilyScore[end]; tmp_end = tmp_score; tmp_score = tmp_start + tmp_end; if(tmp_score > max_score) { max_score = tmp_score; max_start = tmp_start; max_end = tmp_end; max_index = i; } break; } } float score_gate = (float)fabs(m_minProgress * init_score); if(max_score <= score_gate) { vValidMoves.clear(); progress = false; break; } move = vValidMoves[max_index]; start = move.originalEdge.startNode; end = move.originalEdge.endNode; switch (move.DAGChangeType) { case DAG_DEL : if(iDAG->DoMove(start, end, DAG_DEL)) { init_score += max_score; FamilyScore[end] += max_score; } break; case DAG_ADD : if(iDAG->DoMove(start, end, DAG_ADD)) { init_score += max_score; FamilyScore[end] += max_score; } break; case DAG_REV : if(iDAG->DoMove(start, end, DAG_REV)) { init_score += max_score; FamilyScore[start] += max_start; FamilyScore[end] += max_end; } break; } vValidMoves.clear(); step++; } *LearnedScore = this->ScoreDAG(iDAG, &FamilyScore); *LearnedDag = iDAG->Clone(); delete iDAG; }
void CStaticStructLearnSEM::CreateNeighborCPDs(CBNet* pBNet, pCPDVector* vNeighborCPDs, EDGEOPVECTOR* vValidMoves, intVector* RevCorrespDel) { CGraph* pGraph = pBNet->GetGraph(); CDAG* pDAG = CDAG::Create(*pGraph); CModelDomain* pMD = pBNet->GetModelDomain(); intVector vDiscrete, vContinuous; intVector vAncestor, vDescent; intVector vMixture, vMix; const CNodeType* nt; CFactor* factor; int i, j, position; vAncestor.assign(m_vAncestor.begin(), m_vAncestor.end()); vDescent.assign(m_vDescent.begin(), m_vDescent.end()); pBNet->FindMixtureNodes(&vMix); for(i=0; i<vMix.size(); i++) { factor = pBNet->GetFactor(vMix[i]); j = static_cast<CMixtureGaussianCPD*>(factor) -> GetNumberOfMixtureNode(); vMixture.push_back(j); } for(i=0; i<m_nNodes; i++) { nt = pMD->GetVariableType(i); if( nt->IsDiscrete() ) { vDiscrete.push_back(i); } else vContinuous.push_back(i); } vValidMoves->clear(); vNeighborCPDs->clear(); RevCorrespDel->clear(); pDAG->GetAllValidMove(vValidMoves, &vMixture.front(), vMixture.size(), m_nMaxFanIn, &vDiscrete, &vContinuous, &vDescent, &vAncestor ); int nMoves = vValidMoves->size(); intVector domain; EDGEOP curMove; int start, end; for(i=0; i<nMoves; i++) { domain.clear(); curMove = (*vValidMoves)[i]; switch (curMove.DAGChangeType) { case DAG_DEL : start = curMove.originalEdge.startNode; end = curMove.originalEdge.endNode; factor = pBNet->GetFactor(end); factor->GetDomain(&domain); position = std::find(domain.begin(), domain.end(), start) - domain.begin(); domain.erase(domain.begin()+position); vNeighborCPDs->push_back(CreateRandomCPD(domain.size(), &domain.front(), pBNet)); break; case DAG_ADD : start = curMove.originalEdge.startNode; end = curMove.originalEdge.endNode; factor = pBNet->GetFactor(end); factor->GetDomain(&domain); domain.insert(domain.begin(), start); vNeighborCPDs->push_back(CreateRandomCPD(domain.size(), &domain.front(), pBNet)); break; case DAG_REV : end = curMove.originalEdge.startNode; start = curMove.originalEdge.endNode; factor = pBNet->GetFactor(end); factor->GetDomain(&domain); domain.insert(domain.begin(), start); vNeighborCPDs->push_back(CreateRandomCPD(domain.size(), &domain.front(), pBNet)); break; } } RevCorrespDel->assign(nMoves, -1); EDGEOP pre_move; for(i=0; i<nMoves; i++) { curMove = (*vValidMoves)[i]; if(curMove.DAGChangeType == DAG_REV) { start = curMove.originalEdge.startNode; end = curMove.originalEdge.endNode; for(j=0; j<nMoves; j++) { pre_move = (*vValidMoves)[j]; if( (start == pre_move.originalEdge.startNode) && (end == pre_move.originalEdge.endNode) && (pre_move.DAGChangeType == DAG_DEL) ) { (*RevCorrespDel)[i] = j; break; } } } } }
bool CStaticStructLearnSEM::LearnOneStep() { intVecVector decompsition; CGraph* graph = m_pCurrBNet->GetGraph(); graph->GetConnectivityComponents( &decompsition ); CEMLearningEngine* pEMLearn; if(decompsition.size() > 1) { CExInfEngine< CJtreeInfEngine, CBNet, PNL_EXINFENGINEFLAVOUR_DISCONNECTED > *pInf = CExInfEngine< CJtreeInfEngine, CBNet, PNL_EXINFENGINEFLAVOUR_DISCONNECTED >:: Create( m_pCurrBNet ); pEMLearn = CEMLearningEngine::Create(m_pCurrBNet, pInf); } else { CJtreeInfEngine *pInf = CJtreeInfEngine::Create(m_pCurrBNet); pEMLearn = CEMLearningEngine::Create(m_pCurrBNet, pInf); } int i; for(i=0; i<decompsition.size(); i++) decompsition[i].clear(); decompsition.clear(); ConvertToCurrEvidences(m_pCurrBNet); pEMLearn->SetData(m_numberOfAllEvidences, &m_vCurrEvidences.front()); pEMLearn->SetMaxIterEM(m_IterEM); // pEMLearn->ClearStatisticData(); pCPDVector vNeighborCPDs; floatVector vNeighborLLs; EDGEOPVECTOR vValidMoves; intVector vRevCorrespDel; CreateNeighborCPDs(m_pCurrBNet, &vNeighborCPDs, &vValidMoves, &vRevCorrespDel); pEMLearn->LearnExtraCPDs(m_nMaxFanIn+1, &vNeighborCPDs, &vNeighborLLs); // m_pCurrBNet = static_cast<CBNet*>(pEMLearn->GetStaticModel()); const float* familyLL = pEMLearn->GetFamilyLogLik(); floatVector familyScores(m_nNodes,0); int j, freeparams; float logebase = (float)log(float(m_numberOfAllEvidences)); float total_score = 0.0f; CFactor* pCPD; for(i=0; i<m_nNodes; i++) { pCPD = m_pCurrBNet->GetFactor(i); freeparams = pCPD->GetNumberOfFreeParameters(); familyScores[i] = familyLL[i] - 0.5f * float(freeparams) * logebase; total_score += familyScores[i]; } int nMoves = vValidMoves.size(); floatVector neighborScores(nMoves, 0); for(i=0; i<nMoves; i++) { pCPD = static_cast<CFactor*>(vNeighborCPDs[i]); freeparams = pCPD->GetNumberOfFreeParameters(); neighborScores[i] = vNeighborLLs[i] - 0.5f * float(freeparams) * logebase; } int start, end, max_position=0; float tmp_score, best_score = -1e37f; EDGEOP move; for(i=0; i<nMoves; i++) { move = vValidMoves[i]; switch (move.DAGChangeType) { case DAG_DEL : end = move.originalEdge.endNode; tmp_score = neighborScores[i] - familyScores[end]; if( best_score<tmp_score ) { best_score = tmp_score; max_position = i; } break; case DAG_ADD : end = move.originalEdge.endNode; tmp_score = neighborScores[i] - familyScores[end]; if( best_score<tmp_score ) { best_score = tmp_score; max_position = i; } break; case DAG_REV : end = move.originalEdge.startNode; tmp_score = neighborScores[i] - familyScores[end]; end = move.originalEdge.endNode; tmp_score += neighborScores[vRevCorrespDel[i]] - familyScores[end]; if( best_score<tmp_score ) { best_score = tmp_score; max_position = i; } break; } } move = vValidMoves[max_position]; start = move.originalEdge.startNode; end = move.originalEdge.endNode; EDAGChangeType changeType = move.DAGChangeType; CCPD *addCPD=0, *delCPD=0; switch (changeType) { case DAG_DEL : delCPD = static_cast<CCPD*>((vNeighborCPDs[max_position])->Clone()); break; case DAG_ADD : addCPD = static_cast<CCPD*>((vNeighborCPDs[max_position])->Clone()); break; case DAG_REV : addCPD = static_cast<CCPD*>((vNeighborCPDs[max_position])->Clone()); delCPD = static_cast<CCPD*>((vNeighborCPDs[vRevCorrespDel[max_position]])->Clone()); break; } delete pEMLearn; for(i=0; i<vNeighborCPDs.size(); i++) { delete vNeighborCPDs[i]; } vNeighborCPDs.clear(); for(i=0; i<m_numberOfAllEvidences; i++) { delete m_vCurrEvidences[i]; } m_vCurrEvidences.clear(); vValidMoves.clear(); float score_gate = (float)fabs(m_minProgress * total_score); if(best_score <= score_gate) { if(changeType == DAG_REV) { delete addCPD; delete delCPD; } if(changeType == DAG_ADD)delete addCPD; if(changeType == DAG_DEL)delete delCPD; return false; } total_score += best_score; CDAG* pDAG = CDAG::Create(*(m_pCurrBNet->GetGraph())); int node, node1, newnode; if(!(pDAG->DoMove(start, end, changeType))) { PNL_THROW(CInternalError, "There are some internal errors"); } intVector vRenaming, Old2New; CDAG* iDAG; int TopologicSorted = pDAG->IsTopologicallySorted(); if( TopologicSorted ) { iDAG = pDAG->Clone(); for(i=0; i<m_nNodes; i++) vRenaming.push_back(i); } else iDAG = pDAG->TopologicalCreateDAG(vRenaming); pDAG->Dump(); intVector gRename; for(i=0; i<m_nNodes; i++) { node = vRenaming[i]; node1 = m_vGlobalRenaming[node]; gRename.push_back(node1); } m_vGlobalRenaming.assign(gRename.begin(), gRename.end()); int pos; for(i=0; i<m_nNodes; i++) { pos = std::find(vRenaming.begin(), vRenaming.end(), i) - vRenaming.begin(); Old2New.push_back(pos); } const int* oldNodeAsso = m_pCurrBNet->GetNodeAssociations(); intVector newNodeAsso(m_nNodes,0); for(i=0; i<m_nNodes; i++) { newNodeAsso[i] = oldNodeAsso[vRenaming[i]]; } nodeTypeVector vpnt; m_pCurrBNet->GetNodeTypes(&vpnt); CBNet* pBNet = CBNet::Create(m_nNodes, vpnt.size(), &vpnt.front(), &newNodeAsso.front(), static_cast<CGraph*>(iDAG)); CModelDomain* pMDnew = pBNet->GetModelDomain(); pBNet->AllocFactors(); intVector domainNew, domainOld; const CFactor* factor=0; CFactor* curFactor; for(i=0; i<m_nNodes; i++) { domainNew.clear(); newnode = Old2New[i]; if( (i != start) && (i != end) ) { factor = m_pCurrBNet->GetFactor(i); } else { if(changeType == DAG_REV) { if(i == start) factor = addCPD->Clone(); if(i == end) factor = delCPD->Clone(); } if(changeType == DAG_DEL) { if(i == start) factor = m_pCurrBNet->GetFactor(i); if(i == end) factor = delCPD->Clone(); } if(changeType == DAG_ADD) { if(i == start) factor = m_pCurrBNet->GetFactor(i); if(i == end) factor = addCPD->Clone(); } } factor->GetDomain(&domainOld); for(j=0; j<domainOld.size(); j++) { domainNew.push_back(Old2New[domainOld[j]]); } curFactor = CFactor::CopyWithNewDomain(factor, domainNew, pMDnew); pBNet->AttachFactor(curFactor); } if(changeType == DAG_REV) { delete addCPD; delete delCPD; } if(changeType == DAG_ADD)delete addCPD; if(changeType == DAG_DEL)delete delCPD; delete m_pCurrBNet; delete pDAG; m_pCurrBNet = pBNet; m_critValue.push_back(total_score); return true; }
int main() { int i, j; CBNet* pAlarm = CreateAlarmBNet(); const int nnodes = pAlarm->GetNumberOfNodes(); pAlarm->GetGraph()->Dump(); CGraph *graph = CGraph::Create( nnodes, NULL, NULL, NULL ); for(i=0; i<nnodes-1; i++) { graph->AddEdge(i, i+1, 1); } CBNet *bnet = CBNet::CreateWithRandomMatrices( graph, pAlarm->GetModelDomain() ); int nEv = 50; CEvidence **pEvidences = new CEvidence *[nEv]; int dataSize = nnodes;//summ all sizes int* obs_nodes = new int[nnodes]; for(i=0; i<nnodes; i++)obs_nodes[i] = i; //read data from file alarm.dat to evidences FILE * fp = fopen("../../examples/Data/alarm.dat", "r"); if( !fp && !(fp = fopen("../../c_pgmtk/examples/Data/alarm.dat", "r")) ) { std::cout<<"can't open cases file"<<std::endl; exit(1); } valueVector input_data; input_data.resize(dataSize); for(i = 0 ; i < nEv; i++) { for (j = 0; j < dataSize ; j++) { int val; fscanf(fp, "%d,", &val); input_data[j].SetInt(val); } pEvidences[i] = CEvidence::Create(pAlarm, nnodes, obs_nodes, input_data); } fclose(fp); intVector vA; intVector vD; CMlStaticStructLearn *pLearn = CMlStaticStructLearnHC::Create(bnet, itStructLearnML, StructLearnHC, BIC, 5, vA, vD, 1); pLearn->SetData(nEv, pEvidences); pLearn->Learn(); // pLearn->CreateResultBNet(const_cast<CDAG*>(pLearn->GetResultDAG())); // const CBNet* pBNet = pLearn->GetResultBNet(); /////////////////////////////////////////////////////////////////////////////// const CDAG* pDAG = pLearn->GetResultDAG(); CDAG* iDAG = CDAG::Create(*(pAlarm->GetGraph())); int diff = iDAG->SymmetricDifference(pDAG); pDAG->Dump(); delete pLearn; delete iDAG; delete[] obs_nodes; delete pAlarm; delete bnet; for( i = 0; i < nEv; i++) { delete pEvidences[i]; } return 1; }