Ejemplo n.º 1
0
void CMlStaticStructLearnHC::LearnInOneStart(CDAG* InitDag, 
					CDAG** LearnedDag, float* LearnedScore)
{
	int		i, j, step=0;
	bool	progress = true;
	CDAG*	iDAG = InitDag->Clone();
	floatVector FamilyScore;
	float init_score = ScoreDAG(iDAG, &FamilyScore);
	int			nValidMoves;
	EDGEOPVECTOR vValidMoves;
	EDGEOP      move;
	intVector   newFamily;
	intVector	vAncestor, vDescent;
	intVector	vDiscrete, vContinuous;
	int			start, end, position;
	const CNodeType* nt;
	for(i=0; i<m_nNodes; i++)
	{
		nt = m_pGrModel->GetNodeType(i);
		if( nt->IsDiscrete() )
			vDiscrete.push_back(i);
		else
			vContinuous.push_back(i);
	}
	vAncestor.assign(m_vAncestor.begin(), m_vAncestor.end());
	vDescent.assign(m_vDescent.begin(), m_vDescent.end());
	
	while ( step<m_nSteps && progress )
	{
		iDAG->GetAllValidMove(&vValidMoves,&vDiscrete, &vContinuous, &vDescent, &vAncestor);
		nValidMoves = vValidMoves.size();
		float tmp_score, max_score = 0.0f;
		float tmp_start, max_start = 0.0f;
		float tmp_end, max_end = 0.0f;
		int   max_index = 0;
		for(i=0; i<nValidMoves; i++) 
		{
			newFamily.clear();
			move = vValidMoves[i];
			switch (move.DAGChangeType)
			{
			case DAG_DEL : 
				start = move.originalEdge.startNode;
				end = move.originalEdge.endNode;
				iDAG->GetParents(end, &newFamily);
				newFamily.push_back(end);
				position = std::find(newFamily.begin(), newFamily.end(), start)
					       - newFamily.begin();
				newFamily.erase(newFamily.begin()+position);

				tmp_score = ScoreFamily(newFamily) - FamilyScore[end];
				if(tmp_score > max_score)
				{
					max_score = tmp_score;
					max_index = i;
				}
				break;

			case DAG_ADD :
				start = move.originalEdge.startNode;
				end = move.originalEdge.endNode;
				iDAG->GetParents(end, &newFamily);
				position = newFamily.size();
				for(j=0; j<newFamily.size(); j++)
				{
					if(start<newFamily[j])
					{
						position = j;
						break;
					}
				}
				if(position == int(newFamily.size()))
					newFamily.push_back(start);
				else
					newFamily.insert(newFamily.begin()+position, start);		
				newFamily.push_back(end);
				if(newFamily.size() > (m_nMaxFanIn+1))
					break;
				tmp_score = ScoreFamily(newFamily) - FamilyScore[end];
				if(tmp_score > max_score)
				{
					max_score = tmp_score;
					max_index = i;
				}
				break;

			case DAG_REV :
				start = move.originalEdge.startNode;
				end = move.originalEdge.endNode;
				iDAG->GetParents(start, &newFamily); //add an edge
				position = newFamily.size();
				for(j=0; j<newFamily.size(); j++)
				{
					if(end<newFamily[j])
					{
						position = j;
						break;
					}
				}
				if(position == int(newFamily.size()))
					newFamily.push_back(end);
				else
					newFamily.insert(newFamily.begin()+position, end);		
				newFamily.push_back(start);
				if(newFamily.size() > (m_nMaxFanIn+1))
					break;
				tmp_score = ScoreFamily(newFamily) - FamilyScore[start];
				tmp_start = tmp_score;

				start = move.originalEdge.startNode;
				end = move.originalEdge.endNode;
				iDAG->GetParents(end, &newFamily);
				newFamily.push_back(end);
				position = std::find(newFamily.begin(), newFamily.end(), start)
					       - newFamily.begin();
				newFamily.erase(newFamily.begin()+position);

				tmp_score = ScoreFamily(newFamily) - FamilyScore[end];
				tmp_end = tmp_score;
				tmp_score = tmp_start + tmp_end;
				if(tmp_score > max_score)
				{
					max_score = tmp_score;
					max_start = tmp_start;
					max_end   = tmp_end;
					max_index = i;
				}
				break;
			}
		}

		float score_gate = (float)fabs(m_minProgress * init_score);
		if(max_score <= score_gate)
		{
			vValidMoves.clear();
			progress = false;
			break;
		}

		move = vValidMoves[max_index];
		start = move.originalEdge.startNode;
		end = move.originalEdge.endNode;
		switch (move.DAGChangeType)
		{
		case DAG_DEL :
			if(iDAG->DoMove(start, end, DAG_DEL))
			{ 
				init_score += max_score;
				FamilyScore[end] += max_score;
			}
			break;

		case DAG_ADD :
			if(iDAG->DoMove(start, end, DAG_ADD))
			{
				init_score += max_score;
				FamilyScore[end] += max_score;
			}
			break;

		case DAG_REV :
			if(iDAG->DoMove(start, end, DAG_REV))
			{
				init_score += max_score;
				FamilyScore[start] += max_start;
				FamilyScore[end] += max_end;
			}
			break;
		}
		vValidMoves.clear();
		step++;
	}

	*LearnedScore = this->ScoreDAG(iDAG, &FamilyScore);
	*LearnedDag = iDAG->Clone();
	delete iDAG;
}
bool CStaticStructLearnSEM::LearnOneStep()
{
	intVecVector decompsition;
	CGraph* graph = m_pCurrBNet->GetGraph();
	graph->GetConnectivityComponents( &decompsition );
	CEMLearningEngine* pEMLearn;
	if(decompsition.size() > 1)
	{
		CExInfEngine< CJtreeInfEngine, CBNet, PNL_EXINFENGINEFLAVOUR_DISCONNECTED > *pInf = 
                	CExInfEngine< CJtreeInfEngine, CBNet, PNL_EXINFENGINEFLAVOUR_DISCONNECTED >::
       		Create( m_pCurrBNet  );
		pEMLearn = CEMLearningEngine::Create(m_pCurrBNet, pInf);
	}
	else
    {
        CJtreeInfEngine *pInf = CJtreeInfEngine::Create(m_pCurrBNet);
        pEMLearn = CEMLearningEngine::Create(m_pCurrBNet, pInf);
    }

	int i;
	for(i=0; i<decompsition.size(); i++)
		decompsition[i].clear();
	decompsition.clear();

	ConvertToCurrEvidences(m_pCurrBNet);
	pEMLearn->SetData(m_numberOfAllEvidences, &m_vCurrEvidences.front());
	
	pEMLearn->SetMaxIterEM(m_IterEM);

//	pEMLearn->ClearStatisticData();
	pCPDVector vNeighborCPDs;
	floatVector vNeighborLLs;
	EDGEOPVECTOR vValidMoves;
	intVector vRevCorrespDel;
	CreateNeighborCPDs(m_pCurrBNet, &vNeighborCPDs, &vValidMoves, &vRevCorrespDel);

	pEMLearn->LearnExtraCPDs(m_nMaxFanIn+1, &vNeighborCPDs, &vNeighborLLs);

//	m_pCurrBNet = static_cast<CBNet*>(pEMLearn->GetStaticModel());
	const float* familyLL = pEMLearn->GetFamilyLogLik();
	floatVector familyScores(m_nNodes,0);
	int j, freeparams;
	float logebase = (float)log(float(m_numberOfAllEvidences));
	float total_score = 0.0f;
	CFactor* pCPD;
	for(i=0; i<m_nNodes; i++)
	{
		pCPD = m_pCurrBNet->GetFactor(i);
		freeparams = pCPD->GetNumberOfFreeParameters();
		familyScores[i] = familyLL[i] - 0.5f * float(freeparams) * logebase;
		total_score += familyScores[i];
	}
	int nMoves = vValidMoves.size();
	floatVector neighborScores(nMoves, 0);
	for(i=0; i<nMoves; i++)
	{
		pCPD = static_cast<CFactor*>(vNeighborCPDs[i]);
		freeparams = pCPD->GetNumberOfFreeParameters();
		neighborScores[i] = vNeighborLLs[i] - 0.5f * float(freeparams) * logebase;
	}

	int start, end, max_position=0;
	float tmp_score, best_score = -1e37f; 
	EDGEOP move;
	for(i=0; i<nMoves; i++)
	{
		move = vValidMoves[i];
		switch (move.DAGChangeType)
		{
		case DAG_DEL : 
			end = move.originalEdge.endNode;
			tmp_score = neighborScores[i] - familyScores[end];
			if( best_score<tmp_score )
			{
				best_score = tmp_score;
				max_position = i;
			}
			break;

		case DAG_ADD :
			end = move.originalEdge.endNode;
			tmp_score = neighborScores[i] - familyScores[end];
			if( best_score<tmp_score )
			{
				best_score = tmp_score;
				max_position = i;
			}
			break;

		case DAG_REV :
			end = move.originalEdge.startNode;
			tmp_score = neighborScores[i] - familyScores[end];
			
			end = move.originalEdge.endNode;
			tmp_score += neighborScores[vRevCorrespDel[i]] - familyScores[end];
			if( best_score<tmp_score )
			{
				best_score = tmp_score;
				max_position = i;
			}
			break;
		}
	}

	move = vValidMoves[max_position];
	start = move.originalEdge.startNode;
	end = move.originalEdge.endNode;
	EDAGChangeType changeType = move.DAGChangeType;
	CCPD *addCPD=0, *delCPD=0;
	switch (changeType)
	{
	case DAG_DEL : 
		delCPD = static_cast<CCPD*>((vNeighborCPDs[max_position])->Clone());
		break;

	case DAG_ADD :
		addCPD = static_cast<CCPD*>((vNeighborCPDs[max_position])->Clone());
		break;

	case DAG_REV :
		addCPD = static_cast<CCPD*>((vNeighborCPDs[max_position])->Clone());
		delCPD = static_cast<CCPD*>((vNeighborCPDs[vRevCorrespDel[max_position]])->Clone());			
		break;
	}

	delete pEMLearn;
	for(i=0; i<vNeighborCPDs.size(); i++)
	{
		delete vNeighborCPDs[i];
	}
	vNeighborCPDs.clear();
	for(i=0; i<m_numberOfAllEvidences; i++)
	{
		delete m_vCurrEvidences[i];
	}
	m_vCurrEvidences.clear();
	vValidMoves.clear();
	float score_gate = (float)fabs(m_minProgress * total_score);
	if(best_score <= score_gate)
	{
		if(changeType == DAG_REV)
		{
			delete addCPD;
			delete delCPD;
		}
		if(changeType == DAG_ADD)delete addCPD;
		if(changeType == DAG_DEL)delete delCPD;
		return false;
	}

	total_score += best_score;
	CDAG* pDAG = CDAG::Create(*(m_pCurrBNet->GetGraph()));
	int node, node1, newnode;
	if(!(pDAG->DoMove(start, end, changeType)))
	{
		PNL_THROW(CInternalError, "There are some internal errors");
	}

	intVector vRenaming, Old2New;
	CDAG* iDAG;
	int TopologicSorted = pDAG->IsTopologicallySorted();
	if( TopologicSorted )
	{
		iDAG = pDAG->Clone();
		for(i=0; i<m_nNodes; i++) vRenaming.push_back(i);
	}
	else
		iDAG = pDAG->TopologicalCreateDAG(vRenaming);
	pDAG->Dump();
	intVector gRename;
	for(i=0; i<m_nNodes; i++)
	{
		node = vRenaming[i];
		node1 = m_vGlobalRenaming[node];
		gRename.push_back(node1);
	}
	m_vGlobalRenaming.assign(gRename.begin(), gRename.end());

	int pos;
	for(i=0; i<m_nNodes; i++)
	{
		pos = std::find(vRenaming.begin(), vRenaming.end(), i) - vRenaming.begin();
		Old2New.push_back(pos);
	}

	const int* oldNodeAsso = m_pCurrBNet->GetNodeAssociations();
	intVector newNodeAsso(m_nNodes,0);
	for(i=0; i<m_nNodes; i++)
	{
		newNodeAsso[i] = oldNodeAsso[vRenaming[i]];
	}
	nodeTypeVector vpnt;
	m_pCurrBNet->GetNodeTypes(&vpnt);
	CBNet* pBNet = CBNet::Create(m_nNodes, vpnt.size(), &vpnt.front(), 
		           &newNodeAsso.front(), static_cast<CGraph*>(iDAG));
	CModelDomain* pMDnew = pBNet->GetModelDomain();
	pBNet->AllocFactors();
	intVector domainNew, domainOld;
	const CFactor* factor=0;
	CFactor* curFactor;
	for(i=0; i<m_nNodes; i++)
	{
		domainNew.clear();
		newnode = Old2New[i];
		if( (i != start) && (i != end) )
		{
			factor = m_pCurrBNet->GetFactor(i);
		}
		else
		{
			if(changeType == DAG_REV)
			{
				if(i == start)
					factor = addCPD->Clone();
				if(i == end)
					factor = delCPD->Clone();
			}
			if(changeType == DAG_DEL)
			{
				if(i == start)
					factor = m_pCurrBNet->GetFactor(i);
				if(i == end)
					factor = delCPD->Clone();
			}
			if(changeType == DAG_ADD)
			{
				if(i == start)
					factor = m_pCurrBNet->GetFactor(i);
				if(i == end)
					factor = addCPD->Clone();
			}
		}
		factor->GetDomain(&domainOld);
		for(j=0; j<domainOld.size(); j++)
		{
			domainNew.push_back(Old2New[domainOld[j]]);
		}
		curFactor = CFactor::CopyWithNewDomain(factor, domainNew, pMDnew);
		pBNet->AttachFactor(curFactor);
	}

	if(changeType == DAG_REV)
	{
		delete addCPD;
		delete delCPD;
	}
	if(changeType == DAG_ADD)delete addCPD;
	if(changeType == DAG_DEL)delete delCPD;

	delete m_pCurrBNet;
	delete pDAG;
	m_pCurrBNet = pBNet;
	m_critValue.push_back(total_score);
	return true;
}