CCPD* CStaticStructLearnSEM::CreateRandomCPD(int nfamily, const int* family, CBNet* pBNet)
{
	int child = family[nfamily-1];
	CModelDomain* pMD = pBNet->GetModelDomain();
	CFactor* factor = pBNet->GetFactor(child);
	EDistributionType dt = factor->GetDistributionType();
	CCPD* pCPD;

	if( dt == dtTabular )
	{
		pCPD = CTabularCPD::Create(family, nfamily, pMD);
		pCPD->CreateAllNecessaryMatrices(1);
		return pCPD;
	}
	else
	{
		if( dt == dtMixGaussian )
		{
			floatVector data;
			static_cast<CMixtureGaussianCPD*>(factor)->GetProbabilities(&data);
			pCPD = CMixtureGaussianCPD::Create(family, nfamily, pMD, &data.front());
			static_cast<CCondGaussianDistribFun*>(pCPD->GetDistribFun()) -> CreateDefaultMatrices(1);
			return pCPD;
		}
		else
		{
			if( (dt == dtGaussian) || (dt == dtCondGaussian) )
			{
				pCPD = CGaussianCPD::Create(family, nfamily, pMD);
				pCPD->CreateAllNecessaryMatrices(1);
				return pCPD;
			}
			else
				PNL_THROW(CNotImplemented, "this type of distribution is not supported yet");
		}				
	}
}
Ejemplo n.º 2
0
CCPD*
CMlStaticStructLearn::ComputeFactor(intVector vFamily, CGraphicalModel* pGrModel, CEvidence** pEvidences)
{
    int nFamily = vFamily.size();
    int DomainSize;
    const int * domain;
    const CEvidence * pEv;
    int i;
    CTabularDistribFun *pDistribFun;
    CCPD* iCPD = this->CreateRandomCPD(nFamily,
                                       &vFamily.front(), pGrModel);
    int ncases = m_Vector_pEvidences.size();
    if ( !(iCPD->GetDistributionType() == dtSoftMax))
    {
        if (m_ScoreMethod != MarLh)
        {
            iCPD->UpdateStatisticsML( pEvidences, ncases );
            iCPD->ProcessingStatisticalData(ncases);
        }
        else
        {
            iCPD->GetDomain(&DomainSize, &domain);

            pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun());

            pDistribFun->InitPseudoCounts(m_K2alfa);

            for (i=0; i<ncases; i++)
            {
                pEv = m_Vector_pEvidences[i];
                const CEvidence *pEvidences[] = { pEv };
                pDistribFun->BayesUpdateFactor(pEvidences, 1, domain);
            }
            pDistribFun->PriorToCPD();
        }
    }
    else
    {
        float **evid = NULL;
        float **full_evid = NULL;
        BuildFullEvidenceMatrix(&full_evid);
        CSoftMaxCPD* SoftMaxFactor = (CSoftMaxCPD*)iCPD;
        SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid,vFamily,
                m_Vector_pEvidences.size());
        SoftMaxFactor->InitLearnData();
        SoftMaxFactor->SetMaximizingMethod(mmGradient);
        SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(),
                                         0.00001f, 0.01f);
        SoftMaxFactor->CopyLearnDataToDistrib();
        for (int k = 0; k < SoftMaxFactor->GetDomainSize(); k++)
        {
            delete [] evid[k];
        }
        delete [] evid;
        int i;
        intVector obsNodes;
        (m_Vector_pEvidences[0])->GetAllObsNodes(&obsNodes);
        for (i=0; i<obsNodes.size(); i++)
        {
            delete [] full_evid[i];
        }
        delete [] full_evid;
    };
    return iCPD;
}
Ejemplo n.º 3
0
float CMlStaticStructLearn::ComputeFamilyScore(intVector vFamily)
{
    int nFamily = vFamily.size();
    CCPD* iCPD = this->CreateRandomCPD(nFamily, &vFamily.front(), m_pGrModel);
    CTabularDistribFun *pDistribFun;
    int ncases = m_Vector_pEvidences.size();
    const CEvidence * pEv;
    float score;
    float pred = 0;
    EDistributionType NodeType;
    switch (m_ScoreMethod)
    {
    case MaxLh :
        if ( !((iCPD->GetDistribFun()->GetDistributionType() == dtSoftMax)
                || (iCPD->GetDistribFun()->GetDistributionType() == dtCondSoftMax)))
        {
            iCPD->UpdateStatisticsML( &m_Vector_pEvidences.front(), ncases );
            score = iCPD->ProcessingStatisticalData(ncases);
        }
        else
        {
            float **evid = NULL;
            float **full_evid = NULL;
            BuildFullEvidenceMatrix(&full_evid);
            CSoftMaxCPD* SoftMaxFactor = static_cast<CSoftMaxCPD*>(iCPD);
            SoftMaxFactor->BuildCurrentEvidenceMatrix(&full_evid, &evid,
                    vFamily,m_Vector_pEvidences.size());
            SoftMaxFactor->InitLearnData();
            SoftMaxFactor->SetMaximizingMethod(mmGradient);
            SoftMaxFactor->MaximumLikelihood(evid, m_Vector_pEvidences.size(),
                                             0.00001f, 0.01f);
            SoftMaxFactor->CopyLearnDataToDistrib();
            if (SoftMaxFactor->GetDistribFun()->GetDistributionType() == dtSoftMax)

            {
                score = ((CSoftMaxDistribFun*)SoftMaxFactor->GetDistribFun())->CalculateLikelihood(evid,ncases);
            }
            else
            {
                score = ((CCondSoftMaxDistribFun*)SoftMaxFactor->GetDistribFun())->CalculateLikelihood(evid,ncases);
            };
            for (int k = 0; k < SoftMaxFactor->GetDomainSize(); k++)
            {
                delete [] evid[k];
            }
            delete [] evid;
            int i;
            intVector obsNodes;
            (m_Vector_pEvidences[0])->GetAllObsNodes(&obsNodes);
            for (i=0; i<obsNodes.size(); i++)
            {
                delete [] full_evid[i];
            }
            delete [] full_evid;
        };
        break;
    case PreAs :
        int i;
        NodeType = iCPD->GetDistributionType();
        switch (NodeType)
        {
        case dtTabular :
            for(i = 0; i < ncases; i++)
            {

                pConstEvidenceVector tempEv(0);
                tempEv.push_back(m_Vector_pEvidences[i]);
                iCPD->UpdateStatisticsML(&tempEv.front(), tempEv.size());
                iCPD->ProcessingStatisticalData(tempEv.size());
                pred += log(((CTabularCPD*)iCPD)->GetMatrixValue(m_Vector_pEvidences[i]));
            }
            break;
        case dtGaussian :
            for(i = 0; i < ncases; i += 1 )
            {

                pConstEvidenceVector tempEv(0);
                tempEv.push_back(m_Vector_pEvidences[i]);

                iCPD->UpdateStatisticsML(&tempEv.front(), tempEv.size());
                float tmp = 0;
                if (i != 0)
                {
                    tmp =iCPD->ProcessingStatisticalData(1);
                    pred +=tmp;

                }

            }
            break;
        case dtSoftMax:
            PNL_THROW(CNotImplemented,
                      "This type score method has not been implemented yet");
            break;
        default:
            PNL_THROW(CNotImplemented,
                      "This type score method has not been implemented yet");
            break;
        };

        score = pred;
        break;
    case MarLh :
    {
        //проверка того, что потенциал дискретный
        if (iCPD->GetDistributionType() != dtTabular)
        {
            PNL_THROW(CNotImplemented,
                      "This type of score method has been implemented only for discrete nets");
        }

        int DomainSize;
        const int * domain;
        switch(m_priorType)
        {
        case Dirichlet:
            iCPD->GetDomain(&DomainSize, &domain);

            pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun());

            pDistribFun->InitPseudoCounts();

            for (i=0; i<ncases; i++)
            {
                pEv = m_Vector_pEvidences[i];
                const CEvidence *pEvidences[] = { pEv };
                pDistribFun->BayesUpdateFactor(pEvidences, 1, domain);
            }
            score = pDistribFun->CalculateBayesianScore();
            break;
        case K2:
            iCPD->GetDomain(&DomainSize, &domain);

            pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun());

            pDistribFun->InitPseudoCounts(m_K2alfa);

            for (i=0; i<ncases; i++)
            {
                pEv = m_Vector_pEvidences[i];
                const CEvidence *pEvidences[] = { pEv };
                pDistribFun->BayesUpdateFactor(pEvidences, 1, domain);
            }
            score = pDistribFun->CalculateBayesianScore();
            break;
        case BDeu:
            iCPD->GetDomain(&DomainSize, &domain);

            pDistribFun = static_cast<CTabularDistribFun *>(iCPD->GetDistribFun());

            pDistribFun->InitPseudoCounts();

            for (i=0; i<ncases; i++)
            {
                pEv = m_Vector_pEvidences[i];
                const CEvidence *pEvidences[] = { pEv };
                pDistribFun->BayesUpdateFactor(pEvidences, 1, domain);
            }
            score = pDistribFun->CalculateBayesianScore() / iCPD->GetNumberOfFreeParameters();
            break;
        default:
            PNL_THROW(CNotImplemented,
                      "This type of prior has not been implemented yet");
            break;
        }


        break;
    }
    default :
        PNL_THROW(CNotImplemented,
                  "This type score method has not been implemented yet");
        break;
    }


    int dim = iCPD->GetNumberOfFreeParameters();
    switch (m_ScoreType)
    {
    case BIC :
        score -= 0.5f * float(dim) * float(log(float(ncases)));
        break;
    case AIC :
        score -= 0.5f * float(dim);
        break;
    case WithoutFine:
        break;
    case VAR :
        PNL_THROW(CNotImplemented,
                  "This type score function has not been implemented yet");
        break;
    default:
        PNL_THROW(CNotImplemented,
                  "This type score function has not been implemented yet");
        break;
    }

    delete iCPD;
    return score;
}