CCPD* CStaticStructLearnSEM::CreateRandomCPD(int nfamily, const int* family, CBNet* pBNet)
{
	int child = family[nfamily-1];
	CModelDomain* pMD = pBNet->GetModelDomain();
	CFactor* factor = pBNet->GetFactor(child);
	EDistributionType dt = factor->GetDistributionType();
	CCPD* pCPD;

	if( dt == dtTabular )
	{
		pCPD = CTabularCPD::Create(family, nfamily, pMD);
		pCPD->CreateAllNecessaryMatrices(1);
		return pCPD;
	}
	else
	{
		if( dt == dtMixGaussian )
		{
			floatVector data;
			static_cast<CMixtureGaussianCPD*>(factor)->GetProbabilities(&data);
			pCPD = CMixtureGaussianCPD::Create(family, nfamily, pMD, &data.front());
			static_cast<CCondGaussianDistribFun*>(pCPD->GetDistribFun()) -> CreateDefaultMatrices(1);
			return pCPD;
		}
		else
		{
			if( (dt == dtGaussian) || (dt == dtCondGaussian) )
			{
				pCPD = CGaussianCPD::Create(family, nfamily, pMD);
				pCPD->CreateAllNecessaryMatrices(1);
				return pCPD;
			}
			else
				PNL_THROW(CNotImplemented, "this type of distribution is not supported yet");
		}				
	}
}
Пример #2
0
int CBICLearningEngine::DimOfModel(const CStaticGraphicalModel *pModel)
{
/*
compute dimension of the model in (d)
it using in BIC criterion:
BIC = LogLic - 0.5*d*log(N)
    */
    int nParam = pModel->GetNumberOfFactors();
    CFactor *param = NULL;
    int dimOfModel = 0;
    int dim = 1;
    CMatrix<float> *matrix;;
    for (int  i = 0; i < nParam; i++)
    {
	dim = 1;
	param = pModel->GetFactor(i);
	switch (param->GetFactorType())
	{
	case ftCPD:
	    {
		switch (param->GetDistributionType())
		{
		case dtTabular:
		    {

			matrix = param->GetMatrix(matTable);
			int size;
			const int *ranges;
			static_cast<CNumericDenseMatrix<float>*>(matrix)->
			    GetRanges(&size, &ranges);
			for(int j=0; j < size - 1; j++)
			{
			    dim *= ranges[j];

			}
			dim *= ranges[size-1]-1;
			break;

		    }//case dtTabular
		case dtGaussian:
		    {
			PNL_THROW(CNotImplemented,"Gaussian")
			    break;
		    }//case dtGaussian
		case dtCondGaussian:
		    {
			PNL_THROW(CNotImplemented,"CondGaussian")
			    break;
		    }//case dtCondGaussian
		default:
		    {
			PNL_THROW(CBadConst,"distribution type")
			    break;
		    }
		}//swith(param->GetFactorType)
		break;
	    }//end case ftCPD
	case ftPotential:
	    {
		PNL_THROW(CNotImplemented,"Factor")
		    break;
	    }
	default:
	    {
		PNL_THROW(CBadConst,"FactorType")
		    break;
	    }
	}//end switch(param->GetFactor)

	dimOfModel += dim;
    }//end for(i)
    return dimOfModel;
}
Пример #3
0
void CEMLearningEngine::Learn()
{
    CStaticGraphicalModel *pGrModel =  this->GetStaticModel();
    PNL_CHECK_IS_NULL_POINTER(pGrModel);
    PNL_CHECK_LEFT_BORDER(GetNumEv() - GetNumberProcEv() , 1);
    
    CInfEngine *pInfEng = NULL;
    if (m_pInfEngine)
    {
        pInfEng = m_pInfEngine;
    }
    else
    {
        if (!m_bAllObserved)
        {
            pInfEng = CJtreeInfEngine::Create(pGrModel);
            m_pInfEngine = pInfEng;
        }
    }
    
    float loglik = 0.0f;
    
    int nFactors = pGrModel->GetNumberOfFactors();
    const CEvidence *pEv;
    CFactor *pFactor;
    
    int iteration = 0;
    int ev;

    bool IsCastNeed = false;
    int i;
    for( i = 0; i < nFactors; i++ )
    {
        pFactor = pGrModel->GetFactor(i);
        EDistributionType dt = pFactor->GetDistributionType();
        if ( dt == dtSoftMax ) IsCastNeed = true;
    }

    float ** full_evid = NULL;
    if (IsCastNeed)
    {
        BuildFullEvidenceMatrix(&full_evid);
    }

    
    if (IsAllObserved())
    {
        int i;
        float **evid = NULL;
        EDistributionType dt;
        CFactor *factor = NULL;
        for (i = 0; i < nFactors; i++)
        {
            factor = pGrModel->GetFactor(i);
            dt = factor->GetDistributionType();
            if (dt != dtSoftMax)
            {
                factor->UpdateStatisticsML(&m_Vector_pEvidences[GetNumberProcEv()], 
                    GetNumEv() - GetNumberProcEv());
            }
            else
            {
                
                intVector family;
				family.resize(0);
                pGrModel->GetGraph()->GetParents(i, &family);
                family.push_back(i);
                CSoftMaxCPD* SoftMaxFactor = static_cast<CSoftMaxCPD*>(factor);
                SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, 
					&evid,family,m_Vector_pEvidences.size());
				SoftMaxFactor->InitLearnData();
                SoftMaxFactor->SetMaximizingMethod(m_MaximizingMethod);
                SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(),
                    0.00001f, 0.01f);
                SoftMaxFactor->CopyLearnDataToDistrib();
                for (int k = 0; k < factor->GetDomainSize(); k++)
                {
                    delete [] evid[k];
                }
                delete [] evid;
            }
        }
        m_critValue.push_back(UpdateModel());
    }
    else
    {
        bool bContinue;
        const CPotential * pot;
        
/*        bool IsCastNeed = false;
        int i;
        for( i = 0; i < nFactors; i++ )
        {
            pFactor = pGrModel->GetFactor(i);
            EDistributionType dt = pFactor->GetDistributionType();
            if ( dt == dtSoftMax ) IsCastNeed = true;
        }

        float ** full_evid;
        if (IsCastNeed)
        {
            BuildFullEvidenceMatrix(full_evid);
        }*/
        
        do
        {
            ClearStatisticData();
            iteration++;
            for( ev = GetNumberProcEv(); ev < GetNumEv() ; ev++ )
            {
                bool bInfIsNeed = !GetObsFlags(ev)->empty(); 
                pEv = m_Vector_pEvidences[ev];
                if( bInfIsNeed )
                {
                    pInfEng->EnterEvidence(pEv, 0, 0);
                }
                int i;
                for( i = 0; i < nFactors; i++ )
                {
                    pFactor = pGrModel->GetFactor(i);
                    int nnodes;
                    const int * domain;
                    pFactor->GetDomain( &nnodes, &domain );
                    if( bInfIsNeed && !IsDomainObserved(nnodes, domain, ev ) )
                    {
                        pInfEng->MarginalNodes( domain, nnodes, 1 );
                        pot = pInfEng->GetQueryJPD(); 
                        if ( (!(m_Vector_pEvidences[ev])->IsNodeObserved(i)) && (IsCastNeed) )
                        {
                            Cast(pot, i, ev, &full_evid);
                        }
                        EDistributionType dt;
                        dt = pFactor->GetDistributionType();
                        if ( !(dt == dtSoftMax) )
                            pFactor->UpdateStatisticsEM( /*pInfEng->GetQueryJPD */ pot, pEv );
                    }
                    else
                    {
                        if ((pFactor->GetDistributionType()) != dtSoftMax)
                            pFactor->UpdateStatisticsML( &pEv, 1 );
                    }
                }
            }
            
            int i;
/*
            printf ("\n My Full Evidence Matrix");
            for (i=0; i<nFactors; i++)
            {
                for (j=0; j<GetNumEv(); j++)
                {
                    printf ("%f   ", full_evid[i][j]);
                }
                printf("\n");
            } 
*/            
            float **evid = NULL;
            EDistributionType dt;
            CFactor *factor = NULL;
            // int i;
            for (i = 0; i < nFactors; i++)
            {
                factor = pGrModel->GetFactor(i);
                dt = factor->GetDistributionType();
                if (dt == dtSoftMax)
                {
					intVector family;
				    family.resize(0);
                    pGrModel->GetGraph()->GetParents(i, &family);
                    family.push_back(i);
                    CSoftMaxCPD* SoftMaxFactor = static_cast<CSoftMaxCPD*>(factor);
					SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, 
						&evid,family,m_Vector_pEvidences.size());
                    SoftMaxFactor->InitLearnData();
                    SoftMaxFactor->SetMaximizingMethod(m_MaximizingMethod);
                    //        SoftMaxFactor->MaximumLikelihood(evid, m_numberOfLastEvidences, 
                    SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(),
                        0.00001f, 0.01f);
                    SoftMaxFactor->CopyLearnDataToDistrib();
                    for (int k = 0; k < factor->GetDomainSize(); k++)
                    {
                        delete [] evid[k];
                    }
                    delete [] evid;
                }
            }
                        
            loglik = UpdateModel();
            
            if( GetMaxIterEM() != 1)
            {
                bool flag = iteration == 1 ? true : 
                (fabs(2*(m_critValue.back()-loglik)/(m_critValue.back() + loglik)) > GetPrecisionEM() );
                
                bContinue = GetMaxIterEM() > iteration && flag;
            }
            else
            {
                bContinue = false;
            }
            m_critValue.push_back(loglik);
            
        }while(bContinue);
    }
    SetNumProcEv( GetNumEv() );
   
    if (IsCastNeed)
    {
        int NumOfNodes = pGrModel->GetGraph()->GetNumberOfNodes();
        for (i=0; i<NumOfNodes; i++)
        {
            delete [] full_evid[i];
        }
        delete [] full_evid;
    }

}
CBNet *C1_5SliceInfEngine::Create1_5SliceBNet()
{
    CGraph *p1_5SliceGraph = Create1_5SliceGraph();
    PNL_CHECK_IF_MEMORY_ALLOCATED(p1_5SliceGraph);
    intVecVector comp;
    p1_5SliceGraph->GetConnectivityComponents(&comp);
    PNL_CHECK_FOR_NON_ZERO(comp.size() -1 );
    nodeTypeVector nodeTypes;
    int nnodes = p1_5SliceGraph->GetNumberOfNodes();
    GrModel()->GetModelDomain()->GetVariableTypes(&nodeTypes);
    const int *nodeAssociatons = GrModel()->GetNodeAssociations();
    intVector FinalNodeAssociations;
    FinalNodeAssociations.resize(nnodes);
    int numberOfInterfaceNodes;
    const int *interfaceNodes;
    GrModel()->GetInterfaceNodes(&numberOfInterfaceNodes, &interfaceNodes);
    int nnodesPerSlice = GrModel()->GetNumberOfNodes();
    int node;
    for( node = 0; node < numberOfInterfaceNodes; node++ )
    {
	FinalNodeAssociations[node]= nodeAssociatons[interfaceNodes[node]];
    }

    for ( node = numberOfInterfaceNodes; node < nnodes; node++ )
    {
	FinalNodeAssociations[node]=
	    nodeAssociatons[nnodesPerSlice - numberOfInterfaceNodes + node];
    }

    CBNet *p1_5SliceGrModel = CBNet::Create( nnodes,	nodeTypes.size(),
	&nodeTypes.front(), &FinalNodeAssociations.front(), p1_5SliceGraph );

    p1_5SliceGrModel->AllocFactors();


    CFactor *pFactor;
    intVector domain(1);
    CFactor *pUnitFactor;

    for ( node = 0; node < numberOfInterfaceNodes; node++ )
    {
	domain[0] = node;
        if( GrModel()->GetNodeType(interfaceNodes[node])->IsDiscrete() )
	{

	    pUnitFactor =
                CTabularCPD::CreateUnitFunctionCPD( domain,
		p1_5SliceGrModel->GetModelDomain());
	}
	else
	{
            pUnitFactor =
                CGaussianCPD::CreateUnitFunctionCPD( domain,
		p1_5SliceGrModel->GetModelDomain());
	}
	PNL_CHECK_IF_MEMORY_ALLOCATED( pUnitFactor );

        p1_5SliceGrModel->AttachFactor( pUnitFactor );

    }

    for ( node = numberOfInterfaceNodes; node < nnodes; node++ )
    {
	domain.clear();
        p1_5SliceGraph->GetParents(node, &domain);
        domain.push_back(node);

        int num = nnodesPerSlice - numberOfInterfaceNodes + node;
       /*
        pFactor = CFactor::
       	    CopyWithNewDomain(GrModel()->GetFactor( num ), domain, p1_5SliceGrModel->GetModelDomain());
       	p1_5SliceGrModel->AttachFactor(pFactor );
       */
	pFactor = GrModel()->GetFactor(num);
	if( pFactor->GetDistributionType() == dtMixGaussian )
	{
	    floatVector prob;
	    static_cast<CMixtureGaussianCPD *>(pFactor)->GetProbabilities(&prob);
	    CMixtureGaussianCPD *pCPD = CMixtureGaussianCPD::Create(domain, p1_5SliceGrModel->GetModelDomain(), prob );
	    pCPD->TieDistribFun(pFactor);
	    p1_5SliceGrModel->AttachFactor(pCPD);

	}
	else
	{
		p1_5SliceGrModel->AllocFactor(node);
		p1_5SliceGrModel->GetFactor(node)->TieDistribFun(GrModel()->GetFactor(num));
	}
       
    }

    return p1_5SliceGrModel;
}