int EvidenceManager::populateRandIntegers(gsl_rng* r, int* randInds, INTINTMAP& populateFrom, int size) { double step=1.0/(double)size; map<int,int> usedInit; map<int,int> temp; INTINTMAP_ITER tIter=populateFrom.begin(); for(int i=0;i<size;i++) { int tid=tIter->first; temp[i]=tid; tIter++; } for(int i=0;i<size;i++) { double rVal=gsl_ran_flat(r,0,1); int rind=(int)(rVal/step); while(usedInit.find(rind)!=usedInit.end()) { rVal=gsl_ran_flat(r,0,1); rind=(int)(rVal/step); } usedInit[rind]=0; randInds[i]=temp[rind]; } usedInit.clear(); return 0; }
int EvidenceManager::populateRandIntegers(gsl_rng* r, INTINTMAP& randInds,int size, int subsetsize) { double step=1.0/(double)size; for(int i=0;i<subsetsize;i++) { double rVal=gsl_ran_flat(r,0,1); int rind=(int)(rVal/step); while(randInds.find(rind)!=randInds.end()) { rVal=gsl_ran_flat(r,0,1); rind=(int)(rVal/step); } randInds[rind]=0; } return 0; }
int PotentialManager::estimateAllMeanCov(bool random, INTDBLMAP& gMean, map<int,INTDBLMAP*>& gCovar,INTINTMAP& trainEvidSet) { int evidCnt=trainEvidSet.size(); //First get the mean and then the variance int dId=0; for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++) { EMAP* evidMap=NULL; if(random) { evidMap=evMgr->getRandomEvidenceAt(eIter->first); } else { evidMap=evMgr->getEvidenceAt(eIter->first); } for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++) { int vId=vIter->first; Evidence* evid=vIter->second; double val=evid->getEvidVal(); if(gMean.find(vId)==gMean.end()) { gMean[vId]=val; } else { gMean[vId]=gMean[vId]+val; } } dId++; } //Now estimate the mean for(INTDBLMAP_ITER idIter=gMean.begin();idIter!=gMean.end();idIter++) { idIter->second=idIter->second/(double) evidCnt; INTDBLMAP* vcov=new INTDBLMAP; gCovar[idIter->first]=vcov; } return 0; }
int Potential::initMBCovMean() { int vId=factorVariables.begin()->first; int vIdmId=vIDMatIndMap[vId]; mbcondVar=covariance->getValue(vIdmId,vIdmId); mbcondMean_Part=meanWrap[vId]; if(markovBlnktVariables.size()==0) { return 0; } Matrix* mbcov=new Matrix(markovBlnktVariables.size(),markovBlnktVariables.size()); Matrix* mbmargvar=new Matrix(1,markovBlnktVariables.size()); INTINTMAP localMatIDMap; for(INTDBLMAP_ITER uIter=meanWrap.begin();uIter!=meanWrap.end();uIter++) { int i=vIDMatIndMap[uIter->first]; int inew=i; if(i>vIdmId) { inew--; } for(INTDBLMAP_ITER vIter=meanWrap.begin();vIter!=meanWrap.end();vIter++) { if(vIter->first==vId) { continue; } int j=vIDMatIndMap[vIter->first]; double cv=covariance->getValue(i,j); if(j>vIdmId) { j--; } if(uIter->first==vId) { mbmargvar->setValue(cv,0,j); } else { mbcov->setValue(cv,inew,j); } } if(uIter->first!=vId) { localMatIDMap[inew]=uIter->first; } } Matrix* covInv=mbcov->invMatrix(); Matrix* prod1=mbmargvar->multiplyMatrix(covInv); for(INTINTMAP_ITER aIter=localMatIDMap.begin();aIter!=localMatIDMap.end();aIter++) { double aVal=prod1->getValue(0,aIter->first); double bVal=mbmargvar->getValue(0,aIter->first); mbcondVar=mbcondVar-(aVal*bVal); mbcondMean_Vect[aIter->second]=aVal; double cVal=meanWrap[aIter->second]; mbcondMean_Part=mbcondMean_Part-(cVal*aVal); } if(mbcondVar<1e-5) { mbcondVar=1e-5; } localMatIDMap.clear(); delete mbcov; delete mbmargvar; delete covInv; delete prod1; return 0; }
int PotentialManager::estimateAllMeanCov(bool random, INTDBLMAP& gMean, map<int,INTDBLMAP*>& gCovar,INTINTMAP& trainEvidSet, const char* mFName, const char* sdFName,int leaveOutData) { ofstream mFile; ofstream sdFile; if(!random) { if((mFName!=NULL) && (sdFName!=NULL)) { mFile.open(mFName); sdFile.open(sdFName); } } int evidCnt=trainEvidSet.size(); if(leaveOutData!=-1) { evidCnt=evidCnt-1; } //First get the mean and then the variance int dId=0; for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++) { if(dId==leaveOutData) { dId++; continue; } EMAP* evidMap=NULL; if(random) { evidMap=evMgr->getRandomEvidenceAt(eIter->first); } else { evidMap=evMgr->getEvidenceAt(eIter->first); } for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++) { int vId=vIter->first; Evidence* evid=vIter->second; double val=evid->getEvidVal(); if(gMean.find(vId)==gMean.end()) { gMean[vId]=val; } else { gMean[vId]=gMean[vId]+val; } } dId++; } //Now estimate the mean for(INTDBLMAP_ITER idIter=gMean.begin();idIter!=gMean.end();idIter++) { if(idIter->first==176) { //cout <<"Stop here: Variable " << idIter->first << " mean " << idIter->second << endl; } idIter->second=idIter->second/(double) evidCnt; if(!random) { if(mFile.good()) { mFile<<idIter->first<<"\t" << idIter->second<< endl; } } } int covPair=0; //Now the variance for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++) { EMAP* evidMap=NULL; if(random) { evidMap=evMgr->getRandomEvidenceAt(eIter->first); } else { evidMap=evMgr->getEvidenceAt(eIter->first); } for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++) { int vId=vIter->first; Evidence* evid=vIter->second; double vval=evid->getEvidVal(); double vmean=gMean[vId]; INTDBLMAP* vcov=NULL; if(gCovar.find(vId)==gCovar.end()) { vcov=new INTDBLMAP; gCovar[vId]=vcov; } else { vcov=gCovar[vId]; } for(EMAP_ITER uIter=vIter;uIter!=evidMap->end();uIter++) { int uId=uIter->first; //Don't compute covariance of vId uId pairs that both are not in the restrictedNeighborSet, when //the restrictedNeighborSet is empty /* if((!random) && (vId!=uId) && (restrictedNeighborSet.size()>0)) { if((restrictedNeighborSet.find(vId)==restrictedNeighborSet.end()) && (restrictedNeighborSet.find(uId)==restrictedNeighborSet.end())) { continue; } }*/ Evidence* evid1=uIter->second; double uval=evid1->getEvidVal(); double umean=gMean[uId]; double diffprod=(vval-vmean)*(uval-umean); INTDBLMAP* ucov=NULL; if(gCovar.find(uId)==gCovar.end()) { ucov=new INTDBLMAP; gCovar[uId]=ucov; } else { ucov=gCovar[uId]; } if(vcov->find(uId)==vcov->end()) { covPair++; (*vcov)[uId]=diffprod; } else { (*vcov)[uId]=(*vcov)[uId]+diffprod; } if(uId!=vId) { if(ucov->find(vId)==ucov->end()) { (*ucov)[vId]=diffprod; } else { (*ucov)[vId]=(*ucov)[vId]+diffprod; } } } } } cout <<"Total covariance pairs estimated " << covPair << endl; //Now estimate the variance for(map<int,INTDBLMAP*>::iterator idIter=gCovar.begin();idIter!=gCovar.end();idIter++) { INTDBLMAP* var=idIter->second; for(INTDBLMAP_ITER vIter=var->begin();vIter!=var->end();vIter++) { if(vIter->first==idIter->first) { //vIter->second=2*vIter->second/((double)(gCovar.size()-1)); //vIter->second=2*vIter->second/((double)(evidCnt-1)); //vIter->second=(0.001+vIter->second)/((double)(evidCnt-1)); vIter->second=(vIter->second)/((double)(evidCnt-1)); double variance=vIter->second; if(idIter->first==176) { // cout <<"Stop here: Variable " << idIter->first << " variance " << idIter->second << endl; } } else { vIter->second=vIter->second/((double)(evidCnt-1)); //vIter->second=vIter->second/((double)(gCovar.size()-1)); //vIter->second=0; } if(!random) { if(sdFile.good()) { sdFile<<idIter->first<<"\t" << vIter->first <<"\t" << vIter->second << endl; } } } } if(!random) { if(mFile.good()) { mFile.close(); } if(sdFile.good()) { sdFile.close(); } } return 0; }
double PotentialManager::getConditionalEntropy(int vId,INTINTMAP& fVars,VSET& varSet) { double condEntropy=0; //string fullConfStr; //string partConfStr; char confStr[CONSTR_LEN]; /*int varCnt=0; for(INTINTMAP_ITER aIter=fVars.begin();aIter!=fVars.end();aIter++) { sprintf(confStr,"-%d",aIter->first); fullConfStr.append(confStr); if(aIter->first!=vId) { partConfStr.append(confStr); varCnt++; } }*/ double fullJointEntropy=0; /*if(jointEntropies.find(fullConfStr)!=jointEntropies.end()) { fullJointEntropy=jointEntropies[fullConfStr]; } else {*/ Potential* potFunc=new Potential; for(INTINTMAP_ITER aIter=fVars.begin();aIter!=fVars.end();aIter++) { if(aIter==fVars.begin()) { potFunc->setAssocVariable(varSet[aIter->first],Potential::FACTOR); } else { potFunc->setAssocVariable(varSet[aIter->first],Potential::MARKOV_BNKT); } } potFunc->potZeroInit(); populatePotential(potFunc,false); potFunc->calculateJointEntropy(); fullJointEntropy=potFunc->getJointEntropy(); //jointEntropies[fullConfStr]=fullJointEntropy; delete potFunc; //} if(fVars.size()==1) { /*if(jointEntropies.size()>=20000) { jointEntropies.clear(); }*/ return fullJointEntropy; } double partJointEntropy=0; /*if(jointEntropies.find(partConfStr)!=jointEntropies.end()) { partJointEntropy=jointEntropies[partConfStr]; } else { Potential* potFunc=new Potential;*/ potFunc=new Potential; bool setFactorVar=false; for(INTINTMAP_ITER aIter=fVars.begin();aIter!=fVars.end();aIter++) { if(aIter->first==vId) { continue; } if(!setFactorVar) { setFactorVar=true; potFunc->setAssocVariable(varSet[aIter->first],Potential::FACTOR); } else { potFunc->setAssocVariable(varSet[aIter->first],Potential::MARKOV_BNKT); } } STRDBLMAP counts; potFunc->potZeroInit(); populatePotential(potFunc,false); potFunc->calculateJointEntropy(); partJointEntropy=potFunc->getJointEntropy(); //jointEntropies[partConfStr]=partJointEntropy; delete potFunc; //} condEntropy=fullJointEntropy-partJointEntropy; //fullConfStr.clear(); //partConfStr.clear(); /*if(jointEntropies.size()>=20000) { jointEntropies.clear(); }*/ return condEntropy; }
double PotentialManager::getGaussianLikelihood(map<int,SlimFactor*>& factorSet,VSET& varSet, bool train) { Potential* aPotFunc=new Potential; for(map<int,SlimFactor*>::iterator fIter=factorSet.begin();fIter!=factorSet.end();fIter++) { Variable* aVar=varSet[fIter->first]; if(fIter==factorSet.begin()) { aPotFunc->setAssocVariable(aVar,Potential::FACTOR); } else { aPotFunc->setAssocVariable(aVar,Potential::MARKOV_BNKT); } } aPotFunc->potZeroInit(); //Use the graph structure to update the particular elements of the covariance and mean of this potential for(map<int,SlimFactor*>::iterator fIter=factorSet.begin();fIter!=factorSet.end();fIter++) { SlimFactor* sFactor=fIter->second; double mean=0; double cov=0; INTDBLMAP* covar=NULL; if(globalMean.find(fIter->first)==globalMean.end()) { cout <<"No var with id " << fIter->first << endl; exit(0); } mean=globalMean[fIter->first]; covar=globalCovar[fIter->first]; aPotFunc->updateMean(fIter->first,mean); double vval=(*covar)[fIter->first]; aPotFunc->updateCovariance(fIter->first,fIter->first,vval); for(INTINTMAP_ITER mIter=sFactor->mergedMB.begin();mIter!=sFactor->mergedMB.end();mIter++) { if(covar->find(mIter->first)==covar->end()) { cout <<"No var " << mIter->first << " in covariance of " << fIter->first << endl; exit(0); } double cval=(*covar)[mIter->first]; aPotFunc->updateCovariance(fIter->first,mIter->first,cval); aPotFunc->updateCovariance(mIter->first,fIter->first,cval); } } aPotFunc->makeValidJPD(); INTDBLMAP datapt; double gll=0; int thresholded=0; INTINTMAP* dataSet; if(train) { dataSet=(&evMgr->getTrainingSet()); } else { dataSet=(&evMgr->getTestSet()); } for(INTINTMAP_ITER dIter=dataSet->begin();dIter!=dataSet->end();dIter++) { EMAP* evidMap=NULL; evidMap=evMgr->getEvidenceAt(dIter->first); for(map<int,SlimFactor*>::iterator fIter=factorSet.begin();fIter!=factorSet.end();fIter++) { int vId=fIter->first; Evidence* evid=(*evidMap)[vId]; double val=evid->getEvidVal(); datapt[vId]=val; } double jll=aPotFunc->getJointPotValueFor(datapt); if(jll<1e-50) { jll=1e-50; thresholded++; } gll=gll+log(jll); } delete aPotFunc; return gll; }