double Potential::getCondPotValueFor(INTDBLMAP& assignment) { if(assignment.find(factorVariables.begin()->first)==assignment.end()) { cerr <<"Fatal error! No variable assignment for " << factorVariables.begin()->first << endl; exit(-1); } double newmean=0; for(INTDBLMAP_ITER aIter=mbcondMean_Vect.begin();aIter!=mbcondMean_Vect.end();aIter++) { if(assignment.find(aIter->first)==assignment.end()) { cout <<"Warning! No variable assignment for " << aIter->first << endl; continue; } double aval=assignment[aIter->first]; newmean=newmean+(aval*aIter->second); } newmean=newmean+mbcondMean_Part; double normsq=2*PI*mbcondVar; double norm=sqrt(2*PI*mbcondVar); norm=sqrt(normsq); double x=assignment[factorVariables.begin()->first]; double dev=(x-newmean)*(x-newmean); dev=dev/(2*mbcondVar); double eval=exp(-1.0*dev); double pval=eval/norm; return pval; }
double PotentialManager::getPseudoLikelihood(SlimFactor* sFactor,VSET& varSet, bool train,int& status, Potential* aPotFunc) { status=0; Variable* aVar=varSet[sFactor->fId]; aPotFunc->setAssocVariable(aVar,Potential::FACTOR); for(INTINTMAP_ITER aIter=sFactor->mergedMB.begin();aIter!=sFactor->mergedMB.end();aIter++) { Variable* aVar=varSet[aIter->first]; aPotFunc->setAssocVariable(aVar,Potential::MARKOV_BNKT); } aPotFunc->potZeroInit(); populatePotential(aPotFunc,false); //This function creates a submatrix of the covariance matrix and inverts it aPotFunc->initMBCovMean(); if(aPotFunc->getCondVariance()<0) { status=-1; return 0; } INTINTMAP* dataSet=NULL; if(train) { dataSet=&(evMgr->getTrainingSet()); } else { dataSet=&(evMgr->getTestSet()); } INTDBLMAP subData; double pll=0; int thresholded=0; for(INTINTMAP_ITER dIter=dataSet->begin();dIter!=dataSet->end();dIter++) { EMAP* evidMap=NULL; evidMap=evMgr->getEvidenceAt(dIter->first); Evidence* evid=(*evidMap)[sFactor->fId]; double val=evid->getEvidVal(); subData[sFactor->fId]=val; for(INTINTMAP_ITER vIter=sFactor->mergedMB.begin();vIter!=sFactor->mergedMB.end(); vIter++) { int vId=vIter->first; Evidence* evid=(*evidMap)[vIter->first]; double val=evid->getEvidVal(); subData[vId]=val; } double cll=aPotFunc->getCondPotValueFor(subData); if(cll<1e-50) { cll=1e-50; thresholded++; } pll=pll+log(cll); } subData.clear(); return pll; }
int Potential::setCondWeight(INTDBLMAP& wt) { for(INTDBLMAP_ITER aIter=wt.begin();aIter!=wt.end();aIter++) { double aval=aIter->second; mbcondMean_Vect[aIter->first]=aval; } return 0; }
int PotentialManager::estimateAllMeanCov(bool random, INTDBLMAP& gMean, map<int,INTDBLMAP*>& gCovar,INTINTMAP& trainEvidSet) { int evidCnt=trainEvidSet.size(); //First get the mean and then the variance int dId=0; for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++) { EMAP* evidMap=NULL; if(random) { evidMap=evMgr->getRandomEvidenceAt(eIter->first); } else { evidMap=evMgr->getEvidenceAt(eIter->first); } for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++) { int vId=vIter->first; Evidence* evid=vIter->second; double val=evid->getEvidVal(); if(gMean.find(vId)==gMean.end()) { gMean[vId]=val; } else { gMean[vId]=gMean[vId]+val; } } dId++; } //Now estimate the mean for(INTDBLMAP_ITER idIter=gMean.begin();idIter!=gMean.end();idIter++) { idIter->second=idIter->second/(double) evidCnt; INTDBLMAP* vcov=new INTDBLMAP; gCovar[idIter->first]=vcov; } return 0; }
double Potential::predictSample(INTDBLMAP& jointConf, int vId) { if(jointConf.find(factorVariables.begin()->first)==jointConf.end()) { cerr <<"Fatal error! No variable assignment for " << factorVariables.begin()->first << endl; exit(-1); } double newmean=0; for(INTDBLMAP_ITER aIter=mbcondMean_Vect.begin();aIter!=mbcondMean_Vect.end();aIter++) { if(jointConf.find(aIter->first)==jointConf.end()) { cerr <<"Fatal error! No variable assignment for " << aIter->first << endl; exit(-1); } double aval=jointConf[aIter->first]; newmean=newmean+(aval*aIter->second); } newmean=newmean+mbcondMean_Part; return newmean; }
double Potential::generateSample(INTDBLMAP& jointConf, int vId,gsl_rng* r,double gVar) { if(jointConf.find(factorVariables.begin()->first)==jointConf.end()) { cerr <<"Fatal error! No variable assignment for " << factorVariables.begin()->first << endl; exit(-1); } double newmean=0; for(INTDBLMAP_ITER aIter=mbcondMean_Vect.begin();aIter!=mbcondMean_Vect.end();aIter++) { if(jointConf.find(aIter->first)==jointConf.end()) { cerr <<"Fatal error! No variable assignment for " << aIter->first << endl; exit(-1); } double aval=jointConf[aIter->first]; newmean=newmean+(aval*aIter->second); } newmean=newmean+mbcondMean_Part; double x=gsl_ran_gaussian(r,sqrt(gVar)); x=x+newmean; return x; }
//Get the joint prob value for a particular configuration double Potential::getJointPotValueFor(INTDBLMAP& varConf) { string aKey; double pVal=0; Matrix* valMat=new Matrix(varSet.size(),1); for(INTDBLMAP_ITER idIter=varConf.begin();idIter!=varConf.end();idIter++) { int i=vIDMatIndMap[idIter->first]; valMat->setValue(idIter->second,i,0); } Matrix* meanDiff=valMat->subtractMatrix(mean); Matrix* diffT=meanDiff->transMatrix(); Matrix* p1=diffT->multiplyMatrix(inverse); Matrix* p2=p1->multiplyMatrix(meanDiff); double prod=p2->getValue(0,0); pVal=exp(-0.5*prod); pVal=pVal/normFactor; delete meanDiff; delete diffT; delete p1; delete p2; return pVal; }
int PotentialManager::estimateAllMeanCov(bool random, INTDBLMAP& gMean, map<int,INTDBLMAP*>& gCovar,INTINTMAP& trainEvidSet, const char* mFName, const char* sdFName,int leaveOutData) { ofstream mFile; ofstream sdFile; if(!random) { if((mFName!=NULL) && (sdFName!=NULL)) { mFile.open(mFName); sdFile.open(sdFName); } } int evidCnt=trainEvidSet.size(); if(leaveOutData!=-1) { evidCnt=evidCnt-1; } //First get the mean and then the variance int dId=0; for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++) { if(dId==leaveOutData) { dId++; continue; } EMAP* evidMap=NULL; if(random) { evidMap=evMgr->getRandomEvidenceAt(eIter->first); } else { evidMap=evMgr->getEvidenceAt(eIter->first); } for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++) { int vId=vIter->first; Evidence* evid=vIter->second; double val=evid->getEvidVal(); if(gMean.find(vId)==gMean.end()) { gMean[vId]=val; } else { gMean[vId]=gMean[vId]+val; } } dId++; } //Now estimate the mean for(INTDBLMAP_ITER idIter=gMean.begin();idIter!=gMean.end();idIter++) { if(idIter->first==176) { //cout <<"Stop here: Variable " << idIter->first << " mean " << idIter->second << endl; } idIter->second=idIter->second/(double) evidCnt; if(!random) { if(mFile.good()) { mFile<<idIter->first<<"\t" << idIter->second<< endl; } } } int covPair=0; //Now the variance for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++) { EMAP* evidMap=NULL; if(random) { evidMap=evMgr->getRandomEvidenceAt(eIter->first); } else { evidMap=evMgr->getEvidenceAt(eIter->first); } for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++) { int vId=vIter->first; Evidence* evid=vIter->second; double vval=evid->getEvidVal(); double vmean=gMean[vId]; INTDBLMAP* vcov=NULL; if(gCovar.find(vId)==gCovar.end()) { vcov=new INTDBLMAP; gCovar[vId]=vcov; } else { vcov=gCovar[vId]; } for(EMAP_ITER uIter=vIter;uIter!=evidMap->end();uIter++) { int uId=uIter->first; //Don't compute covariance of vId uId pairs that both are not in the restrictedNeighborSet, when //the restrictedNeighborSet is empty /* if((!random) && (vId!=uId) && (restrictedNeighborSet.size()>0)) { if((restrictedNeighborSet.find(vId)==restrictedNeighborSet.end()) && (restrictedNeighborSet.find(uId)==restrictedNeighborSet.end())) { continue; } }*/ Evidence* evid1=uIter->second; double uval=evid1->getEvidVal(); double umean=gMean[uId]; double diffprod=(vval-vmean)*(uval-umean); INTDBLMAP* ucov=NULL; if(gCovar.find(uId)==gCovar.end()) { ucov=new INTDBLMAP; gCovar[uId]=ucov; } else { ucov=gCovar[uId]; } if(vcov->find(uId)==vcov->end()) { covPair++; (*vcov)[uId]=diffprod; } else { (*vcov)[uId]=(*vcov)[uId]+diffprod; } if(uId!=vId) { if(ucov->find(vId)==ucov->end()) { (*ucov)[vId]=diffprod; } else { (*ucov)[vId]=(*ucov)[vId]+diffprod; } } } } } cout <<"Total covariance pairs estimated " << covPair << endl; //Now estimate the variance for(map<int,INTDBLMAP*>::iterator idIter=gCovar.begin();idIter!=gCovar.end();idIter++) { INTDBLMAP* var=idIter->second; for(INTDBLMAP_ITER vIter=var->begin();vIter!=var->end();vIter++) { if(vIter->first==idIter->first) { //vIter->second=2*vIter->second/((double)(gCovar.size()-1)); //vIter->second=2*vIter->second/((double)(evidCnt-1)); //vIter->second=(0.001+vIter->second)/((double)(evidCnt-1)); vIter->second=(vIter->second)/((double)(evidCnt-1)); double variance=vIter->second; if(idIter->first==176) { // cout <<"Stop here: Variable " << idIter->first << " variance " << idIter->second << endl; } } else { vIter->second=vIter->second/((double)(evidCnt-1)); //vIter->second=vIter->second/((double)(gCovar.size()-1)); //vIter->second=0; } if(!random) { if(sdFile.good()) { sdFile<<idIter->first<<"\t" << vIter->first <<"\t" << vIter->second << endl; } } } } if(!random) { if(mFile.good()) { mFile.close(); } if(sdFile.good()) { sdFile.close(); } } return 0; }
int EvidenceManager::populateEvidence(Evidence** evid,const char* evidStr) { //first check for validity of evidStr if(strchr(evidStr,'=')==NULL) { return -1; } *evid=new Evidence; INTDBLMAP evidData; int currInd=0; int ttInd=0; int tokId=0; char tempTok[256]; while(evidStr[currInd]!='\0') { if((evidStr[currInd]=='=') || (evidStr[currInd]==']') || (evidStr[currInd]==',') ) { tempTok[ttInd]='\0'; ttInd=0; if(tokId==0) { //This is the variable int vId=atoi(tempTok); Variable* var=vMgr->getVariableAt(vId); var->initEvidence(evidData); (*evid)->assocVariable(vId); } else { char* pos=strchr(tempTok,'|'); //Hard evidence if(pos==NULL) { int varVal=atoi(tempTok); if(evidData.find(varVal)==evidData.end()) { cout <<"No value "<< varVal << " in the domain of a variable" << endl; return -1; } evidData[varVal]=1.0; (*evid)->setType(Evidence::HARD); } else { *pos='\0'; int varVal=atoi(tempTok); double varValProb=atof(pos+1); if(evidData.find(varVal)==evidData.end()) { cout <<"No value "<< varVal << " in the domain of a variable" << endl; return -1; } evidData[varVal]=varValProb; //Will be setting it multiple times but its ok for now. (*evid)->setType(Evidence::SOFT); } } tokId++; } else if(evidStr[currInd]!='[') { tempTok[ttInd]=evidStr[currInd]; ttInd++; } currInd++; } (*evid)->setData(evidData); return 0; }