Beispiel #1
0
int
PotentialManager::estimateAllMeanCov(bool random, INTDBLMAP& gMean, map<int,INTDBLMAP*>& gCovar,INTINTMAP& trainEvidSet)
{
	int evidCnt=trainEvidSet.size();
	//First get the mean and then the variance
	int dId=0;
	for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++)
	{
		EMAP* evidMap=NULL;
		if(random)
		{
			evidMap=evMgr->getRandomEvidenceAt(eIter->first);
		}
		else
		{
			evidMap=evMgr->getEvidenceAt(eIter->first);
		}
		for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++)
		{
			int vId=vIter->first;
			Evidence* evid=vIter->second;
			double val=evid->getEvidVal();
			if(gMean.find(vId)==gMean.end())
			{
				gMean[vId]=val;
			}
			else
			{
				gMean[vId]=gMean[vId]+val;
			}
		}
		dId++;	
	}
	//Now estimate the mean
	for(INTDBLMAP_ITER idIter=gMean.begin();idIter!=gMean.end();idIter++)
	{
		idIter->second=idIter->second/(double) evidCnt;
		INTDBLMAP* vcov=new INTDBLMAP;
		gCovar[idIter->first]=vcov;
	}
	return 0;
}
Beispiel #2
0
int
PotentialManager::estimateAllMeanCov(bool random, INTDBLMAP& gMean, map<int,INTDBLMAP*>& gCovar,INTINTMAP& trainEvidSet, const char* mFName, const char* sdFName,int leaveOutData)
{
	ofstream mFile;
	ofstream sdFile;

	if(!random)
	{
		if((mFName!=NULL) && (sdFName!=NULL))
		{
			mFile.open(mFName);
			sdFile.open(sdFName);
		}
	}

	int evidCnt=trainEvidSet.size();
	if(leaveOutData!=-1)
	{
		evidCnt=evidCnt-1;
	}
	//First get the mean and then the variance
	int dId=0;
	for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++)
	{
		if(dId==leaveOutData)
		{	
			dId++;
			continue;
		}
		EMAP* evidMap=NULL;
		if(random)
		{
			evidMap=evMgr->getRandomEvidenceAt(eIter->first);
		}
		else
		{
			evidMap=evMgr->getEvidenceAt(eIter->first);
		}
		for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++)
		{
			int vId=vIter->first;
			Evidence* evid=vIter->second;
			double val=evid->getEvidVal();
			if(gMean.find(vId)==gMean.end())
			{
				gMean[vId]=val;
			}
			else
			{
				gMean[vId]=gMean[vId]+val;
			}
		}
		dId++;	
	}
	//Now estimate the mean
	for(INTDBLMAP_ITER idIter=gMean.begin();idIter!=gMean.end();idIter++)
	{
		if(idIter->first==176)
		{
			//cout <<"Stop here: Variable " << idIter->first << " mean " << idIter->second << endl;
		}
		idIter->second=idIter->second/(double) evidCnt;
		if(!random)
		{
			if(mFile.good())
			{
				mFile<<idIter->first<<"\t" << idIter->second<< endl;
			}
		}
	}
	int covPair=0;
	//Now the variance
	for(INTINTMAP_ITER eIter=trainEvidSet.begin();eIter!=trainEvidSet.end();eIter++)
	{
		EMAP* evidMap=NULL;
		if(random)
		{
			evidMap=evMgr->getRandomEvidenceAt(eIter->first);
		}
		else
		{
			evidMap=evMgr->getEvidenceAt(eIter->first);
		}
		for(EMAP_ITER vIter=evidMap->begin();vIter!=evidMap->end(); vIter++)
		{
			int vId=vIter->first;
			Evidence* evid=vIter->second;
			double vval=evid->getEvidVal();
			double vmean=gMean[vId];
			INTDBLMAP* vcov=NULL;
			if(gCovar.find(vId)==gCovar.end())
			{
				vcov=new INTDBLMAP;
				gCovar[vId]=vcov;
			}
			else
			{
				vcov=gCovar[vId];
			}
			for(EMAP_ITER uIter=vIter;uIter!=evidMap->end();uIter++)
			{
				int uId=uIter->first;
				//Don't compute covariance of vId uId pairs that both are not in the restrictedNeighborSet, when
				//the restrictedNeighborSet is empty
			/*	if((!random) && (vId!=uId) && (restrictedNeighborSet.size()>0))
				{
					if((restrictedNeighborSet.find(vId)==restrictedNeighborSet.end()) && (restrictedNeighborSet.find(uId)==restrictedNeighborSet.end()))
					{
						continue;
					}
				}*/
				Evidence* evid1=uIter->second;
				double uval=evid1->getEvidVal();
				double umean=gMean[uId];
				double diffprod=(vval-vmean)*(uval-umean);
				INTDBLMAP* ucov=NULL;
				if(gCovar.find(uId)==gCovar.end())
				{
					ucov=new INTDBLMAP;
					gCovar[uId]=ucov;
				}
				else
				{
					ucov=gCovar[uId];
				}
				if(vcov->find(uId)==vcov->end())
				{
					covPair++;
					(*vcov)[uId]=diffprod;
				}
				else
				{
					(*vcov)[uId]=(*vcov)[uId]+diffprod;
				}
				if(uId!=vId)
				{
					if(ucov->find(vId)==ucov->end())
					{
						(*ucov)[vId]=diffprod;
					}
					else
					{
						(*ucov)[vId]=(*ucov)[vId]+diffprod;
					}
				}
			}
		}

	}
	cout <<"Total covariance pairs estimated " << covPair << endl;
	//Now estimate the variance
	for(map<int,INTDBLMAP*>::iterator idIter=gCovar.begin();idIter!=gCovar.end();idIter++)
	{
		INTDBLMAP* var=idIter->second;
		for(INTDBLMAP_ITER vIter=var->begin();vIter!=var->end();vIter++)
		{
			if(vIter->first==idIter->first)
			{
				//vIter->second=2*vIter->second/((double)(gCovar.size()-1));
				//vIter->second=2*vIter->second/((double)(evidCnt-1));
				//vIter->second=(0.001+vIter->second)/((double)(evidCnt-1));
				vIter->second=(vIter->second)/((double)(evidCnt-1));
				double variance=vIter->second;
				if(idIter->first==176)
				{
				//	cout <<"Stop here: Variable " << idIter->first << " variance " << idIter->second << endl;
				}
			}
			else
			{
				vIter->second=vIter->second/((double)(evidCnt-1));
				//vIter->second=vIter->second/((double)(gCovar.size()-1));
				//vIter->second=0;
			}
			if(!random)
			{
				if(sdFile.good())
				{
					sdFile<<idIter->first<<"\t" << vIter->first <<"\t" << vIter->second << endl;
				}
			}
		}
	}
	if(!random)
	{
		if(mFile.good())
		{
			mFile.close();
		}
		if(sdFile.good())
		{
			sdFile.close();
		}
	}	
	return 0;
}
Beispiel #3
0
double 
PotentialManager::getConditionalEntropy(int vId,INTINTMAP& fVars,VSET& varSet)
{
	double condEntropy=0;
	//string fullConfStr;
	//string partConfStr;
	char confStr[CONSTR_LEN];
	/*int varCnt=0;
	for(INTINTMAP_ITER aIter=fVars.begin();aIter!=fVars.end();aIter++)
	{
		sprintf(confStr,"-%d",aIter->first);
		fullConfStr.append(confStr);
		if(aIter->first!=vId)
		{
			partConfStr.append(confStr);
			varCnt++;
		}
	}*/
	double fullJointEntropy=0;
	/*if(jointEntropies.find(fullConfStr)!=jointEntropies.end())
	{
		fullJointEntropy=jointEntropies[fullConfStr];
	}
	else
	{*/
		Potential* potFunc=new Potential;
		for(INTINTMAP_ITER aIter=fVars.begin();aIter!=fVars.end();aIter++)
		{
			if(aIter==fVars.begin())
			{
				potFunc->setAssocVariable(varSet[aIter->first],Potential::FACTOR);
			}
			else
			{
				potFunc->setAssocVariable(varSet[aIter->first],Potential::MARKOV_BNKT);
			}
		}
		potFunc->potZeroInit();
		populatePotential(potFunc,false);
		potFunc->calculateJointEntropy();
		fullJointEntropy=potFunc->getJointEntropy();
		//jointEntropies[fullConfStr]=fullJointEntropy;
		delete potFunc;
	//}
	if(fVars.size()==1)
	{
		/*if(jointEntropies.size()>=20000)
		{
			jointEntropies.clear();
		}*/
		return fullJointEntropy;
	}
	double partJointEntropy=0;
	/*if(jointEntropies.find(partConfStr)!=jointEntropies.end())
	{
		partJointEntropy=jointEntropies[partConfStr];
	}
	else
	{
		Potential* potFunc=new Potential;*/
		potFunc=new Potential;
		bool setFactorVar=false;
		for(INTINTMAP_ITER aIter=fVars.begin();aIter!=fVars.end();aIter++)
		{
			if(aIter->first==vId)
			{
				continue;
			}
			if(!setFactorVar)
			{
				setFactorVar=true;
				potFunc->setAssocVariable(varSet[aIter->first],Potential::FACTOR);
			}
			else
			{
				potFunc->setAssocVariable(varSet[aIter->first],Potential::MARKOV_BNKT);
			}
		}
		STRDBLMAP counts;
		potFunc->potZeroInit();
		populatePotential(potFunc,false);
		potFunc->calculateJointEntropy();
		partJointEntropy=potFunc->getJointEntropy();
		//jointEntropies[partConfStr]=partJointEntropy;
		delete potFunc;
	//}
	condEntropy=fullJointEntropy-partJointEntropy;
	//fullConfStr.clear();
	//partConfStr.clear();
	/*if(jointEntropies.size()>=20000)
	{
		jointEntropies.clear();
	}*/
	return condEntropy;
}