示例#1
0
void CMlLearningEngine::Learn()
{
/*
function takes an information from m_pEvidences and learns factors
of graphical model using prior probabilities or not
    */
    float logLikTmp = 0;
    if(!m_pGrModel)
    {
        PNL_THROW( CNULLPointer, "no graphical model")
    }
    CStaticGraphicalModel *grmodel = this->GetStaticModel();
    CFactor *parameter = NULL;
    int numberOfDomains = grmodel -> GetNumberOfFactors();
    
    for( int domainNodes = 0; domainNodes < numberOfDomains; domainNodes++ )
    {
        factor = grmodel->GetFactor( domainNodes );
        factor ->UpdateStatisticsML( &m_Vector_pEvidences.front(), 
            m_Vector_pEvidences.size() );
        PNL_CHECK_LEFT_BORDER(m_numberOfAllEvidences, 1);
        logLikTmp += parameter->ProcessingStatisticalData(m_numberOfAllEvidences);
    }
    switch( grmodel -> GetModelType() )
    {
    case mtBNet:
        {
            break;
        }
    case mtMRF2:
    case mtMNet:
        {
            logLikTmp = _LearnPotentials();
            break;
        }
    default:
        {
            PNL_THROW(CBadConst, "model type" )
                break;
        }
    }
    m_critValue.push_back(logLikTmp);
}
void CParEMLearningEngine::Learn()
{
    CStaticGraphicalModel *pGrModel =  this->GetStaticModel();
    PNL_CHECK_IS_NULL_POINTER(pGrModel);
    PNL_CHECK_LEFT_BORDER(GetNumEv() - GetNumberProcEv() , 1);

    CJtreeInfEngine *pCurrentInfEng = NULL;

    CFactor *parameter = NULL;
    int exit = 0;
    int numberOfParameters = pGrModel->GetNumberOfParameters();
    int domainNodes;
    int infIsNeed = 0;
    int itsML = 0;

    // !!!
    float loglik = -FLT_MAX;
    float loglikOld = -FLT_MAX;
    float epsilon = GetPrecisionEM();
    float stopExpression = epsilon + 1.0f;
    int iteration = 0;
    int currentEvidNumber;
    int bMaximize = 0;
    int bSumOnMixtureNode = 0;
    const CEvidence* pCurrentEvid;
    int start_mpi, finish_mpi;
    int NumberOfProcesses, MyRank;
    int numSelfEvidences;
    
    MPI_Comm_size(MPI_COMM_WORLD, &NumberOfProcesses);
    MPI_Comm_rank(MPI_COMM_WORLD, &MyRank);

    int d = 0;
    do
    {
        iteration++;

        numSelfEvidences = (GetNumEv() - GetNumberProcEv()) / NumberOfProcesses;
        start_mpi = GetNumberProcEv() + numSelfEvidences * MyRank; // !!!
        if (MyRank < NumberOfProcesses - 1)
            finish_mpi = start_mpi + numSelfEvidences; // !!!
        else
            finish_mpi = GetNumEv(); // !!!        

        for(int ev = start_mpi; ev < finish_mpi; ev++)
        {
            infIsNeed = 0;
            currentEvidNumber = ev; // !!!

            pCurrentEvid = m_Vector_pEvidences[currentEvidNumber];
            if( !pCurrentEvid)
            {
                PNL_THROW(CNULLPointer, "evidence")
            }

            infIsNeed = !GetObsFlags(ev)->empty(); // !!!

            if(infIsNeed)
            {
                // create inference engine
                if(!pCurrentInfEng)
                {
                    pCurrentInfEng = CJtreeInfEngine::Create(pGrModel);
                }
                pCurrentInfEng->EnterEvidence(pCurrentEvid, bMaximize,
                    bSumOnMixtureNode);
            }

            for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++)
            {
                parameter = pGrModel->GetFactor(domainNodes);
                if(infIsNeed)
                {
                    int DomainSize;
                    const int *domain;
                    parameter->GetDomain(&DomainSize, &domain);
                    if (IsDomainObserved(DomainSize, domain, currentEvidNumber))
                    {
                        const CEvidence *pEvidences[] = { pCurrentEvid };
                        parameter->UpdateStatisticsML(pEvidences, 1);
                    }
                    else
                    {
                        pCurrentInfEng->MarginalNodes(domain, DomainSize, 1);
                        const CPotential * pMargPot = pCurrentInfEng->GetQueryJPD();
                        parameter ->UpdateStatisticsEM(pMargPot, pCurrentEvid);
                    }
                }
                else
                {
                    const CEvidence *pEvidences[] = { pCurrentEvid };
                    parameter->UpdateStatisticsML(pEvidences, 1);
                }
            }
            itsML = itsML || !infIsNeed;
        }

        for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++ )
        {
            parameter = pGrModel->GetFactor(domainNodes);
            
            CNumericDenseMatrix<float> *matForSending;
            int matDim;
            const int *pMatRanges;
            int dataLength;
            const float *pDataForSending;

            matForSending = static_cast<CNumericDenseMatrix<float>*>
                ((parameter->GetDistribFun())->GetStatisticalMatrix(stMatTable));

            matForSending->GetRanges(&matDim, &pMatRanges);
            matForSending->GetRawData(&dataLength, &pDataForSending);
            float *pDataRecv = new float[dataLength];
            float *pDataRecv_copy = new float[dataLength];
            MPI_Status status;

            MPI_Allreduce((void*)pDataForSending, pDataRecv, dataLength, MPI_FLOAT, MPI_SUM,
                MPI_COMM_WORLD);

            CNumericDenseMatrix<float> *RecvMatrix =
                static_cast<CNumericDenseMatrix<float>*>
                (parameter->GetDistribFun()->GetStatisticalMatrix(stMatTable));
            int dataLength_new;
            float *pData_new;
            RecvMatrix->GetRawData(&dataLength_new, (const float**)(&pData_new));
            for(int t=0;t<dataLength_new;t++)
                pData_new[t]=pDataRecv[t];
        }
        switch (pGrModel->GetModelType())
        {
        case mtBNet:
            {
                loglikOld = loglik;
                loglik = 0.0f;
                for(domainNodes = 0; domainNodes < numberOfParameters; domainNodes++)
                {
                    parameter = pGrModel->GetFactor(domainNodes);
                    loglik += parameter->ProcessingStatisticalData(m_numberOfAllEvidences);
                }
                break;
            }
        case mtMRF2:
        case mtMNet:
            {
                loglikOld = loglik;
                loglik = _LearnPotentials();
                break;
            }
        default:
            {
                PNL_THROW(CBadConst, "model type")
                    break;
            }
        }

        stopExpression = 
            float(fabs(2 * (loglikOld - loglik) / (loglikOld + loglik)));
        exit = ((stopExpression > epsilon) && (iteration <= GetMaxIterEM())) && !itsML;
        if(exit)
        {
            ClearStatisticData();
        }

        delete pCurrentInfEng;
        pCurrentInfEng = NULL;
    }while(exit);

    if(iteration > GetMaxIterEM())
    {
        PNL_THROW(CNotConverged, "maximum number of iterations")
    }

    SetNumProcEv( GetNumEv() );
}
示例#3
0
void CEMLearningEngine::LearnExtraCPDs(int nMaxFamily, pCPDVector* additionalCPDs, floatVector* additionalLLs)
{

    CStaticGraphicalModel *pGrModel =  this->GetStaticModel();
    PNL_CHECK_IS_NULL_POINTER(pGrModel);
    PNL_CHECK_LEFT_BORDER(GetNumEv(), 1);
    
    int numberOfFactors = pGrModel->GetNumberOfFactors();
    int numberOfAddFactors = additionalCPDs->size();
    
    additionalLLs->resize(numberOfAddFactors);
    additionalLLs->clear();
    
    m_vFamilyLogLik.resize(numberOfFactors);
    float	loglik = 0.0f, ll;
    int		i, ev;
    int iteration = 0;
    const CEvidence* pEv;
    
    CFactor *factor = NULL;
    int nnodes;
    const int * domain;
    
    bool bInfIsNeed;
    CInfEngine *pInfEng = m_pInfEngine;
    
    if (IsAllObserved())
    {
        for (i = 0; i < numberOfFactors; i++)
        {
            factor = pGrModel->GetFactor(i);
            factor->UpdateStatisticsML(&m_Vector_pEvidences[GetNumberProcEv()], 
                GetNumEv() - GetNumberProcEv());
        }
        
        for( ev = 0; ev < GetNumEv() ; ev++)
        {
            pEv = m_Vector_pEvidences[ev];
            for( i = 0; i < numberOfAddFactors; i++ )
            {
                factor = static_cast<CFactor*>((*additionalCPDs)[i]);
                factor->UpdateStatisticsML( &pEv, 1 );
            }
        }
        
        switch (pGrModel->GetModelType())
        {
        case mtBNet:
            {
                for( i = 0; i<numberOfFactors; i++ )
                {
                    factor = pGrModel->GetFactor(i);
                    ll = factor->ProcessingStatisticalData( GetNumEv());
                    m_vFamilyLogLik[i] = ll;
                    loglik += ll;
                }
                
                for( i = 0; i < numberOfAddFactors; i++ )
                {
                    factor = static_cast<CFactor*>((*additionalCPDs)[i]);
                    ll = factor->ProcessingStatisticalData( GetNumEv());
                    (*additionalLLs)[i] = ll;
                }
                break;
            }
        case mtMRF2:
        case mtMNet:
            {	
                break;
            }
        default:
            {
                PNL_THROW(CBadConst, "model type" )
                    break;
            }
        }
        m_critValue.push_back(loglik);    
        
    }
    else
    {
示例#4
0
int testSetStatistics()
{
    int ret = TRS_OK;
    float eps = 0.1f;
    
    int seed = pnlTestRandSeed();
    pnlSeed( seed );   
            
    CBNet *pBNet = pnlExCreateCondGaussArBNet();
    CModelDomain *pMD = pBNet->GetModelDomain();

    
    CGraph *pGraph = CGraph::Copy(pBNet->GetGraph());
    
    CBNet *pBNet1 = CBNet::CreateWithRandomMatrices( pGraph, pMD );

    pEvidencesVector evidences;
    int nEvidences = pnlRand( 3000, 4000);
    
    pBNet->GenerateSamples( &evidences, nEvidences );
   
    
    int i;
    for( i = 0; i < nEvidences; i++)
    {
	
	//evidences[i]->MakeNodeHiddenBySerialNum(0);
    }
    

    CEMLearningEngine *pLearn = CEMLearningEngine::Create(pBNet1);
    pLearn->SetData( nEvidences, &evidences.front() );
    pLearn->SetMaxIterEM();
    pLearn->Learn();

    for( i = 0; i < pBNet->GetNumberOfFactors(); i++ )
    {
	if( ! pBNet->GetFactor(i)->IsFactorsDistribFunEqual(pBNet1->GetFactor(i), eps))
	{
	    ret = TRS_FAIL;
	    pBNet->GetFactor(i)->GetDistribFun()->Dump();
	    pBNet1->GetFactor(i)->GetDistribFun()->Dump();

	}
    }
    
    CDistribFun *pDistr;
    const CMatrix<float>* pMat;
    CFactor *pCPD;
    
    pDistr = pBNet1->GetFactor(0)->GetDistribFun();
    pMat = pDistr->GetStatisticalMatrix(stMatTable);
    
    pCPD = pBNet->GetFactor(0);
    pCPD->SetStatistics(pMat, stMatTable);
    pCPD->ProcessingStatisticalData(nEvidences);
    if( ! pCPD->IsFactorsDistribFunEqual(pBNet1->GetFactor(0), 0.0001f) )
    {
	ret = TRS_FAIL;
    }
    

    pDistr = pBNet1->GetFactor(1)->GetDistribFun();
    
    int parentVal;
    pCPD = pBNet->GetFactor(1);
    
    parentVal = 0;

    pCPD->SetStatistics(pMat, stMatCoeff);

    pMat = pDistr->GetStatisticalMatrix(stMatMu, &parentVal);
    pCPD->SetStatistics(pMat, stMatMu, &parentVal);
    
    
    pMat = pDistr->GetStatisticalMatrix(stMatSigma, &parentVal);
    pCPD->SetStatistics(pMat, stMatSigma, &parentVal);
    
    parentVal = 1;
    
    pMat = pDistr->GetStatisticalMatrix(stMatMu, &parentVal);
    pCPD->SetStatistics(pMat, stMatMu, &parentVal);
    
    
    pMat = pDistr->GetStatisticalMatrix(stMatSigma, &parentVal);
    pCPD->SetStatistics(pMat, stMatSigma, &parentVal);

    pCPD->ProcessingStatisticalData(nEvidences);
    
    if( ! pCPD->IsFactorsDistribFunEqual(pBNet1->GetFactor(1), eps) )
    {
	ret = TRS_FAIL;
    }
    
    
    for( i = 0; i < nEvidences; i++)
    {
	delete evidences[i];
    }
    delete pLearn;
    delete pBNet1;
    delete pBNet;

    
    return trsResult( ret, ret == TRS_OK ? "No errors" : 
    "Bad test on SetStatistics");
    
    
}