Пример #1
0
bool FindBest1DDims(vector<float>& vFloat,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<prob_t>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp)
{	vBestDims.Init(iClusts+1,iBestDims);
	vKLDivs.Init(iClusts+1,iBestDims);
	vKLDivs.Fill(-99999.9f);
	int iC = 1 , iRows = vClustIDs.size();
	double dJnk = 0.0;
	CString msg;
	for(iC=1;iC<=iClusts;iC++)
	{	int iD;
		for(iD=0;iD<iCols;iD++,dJnk++)
		{	msg.Format("Finding best %d dimensions for cluster %d of %d : Dim=%s",iBestDims,iC,iClusts,*vAxes[iD]);
			pUp->SetProgress(msg,100.0*dJnk/(iClusts*iCols));
			vector<float> v1DFloatClust(vCounts[iC]),v1DFloatComp(iRows-vCounts[iC]);
			int idxClust = 0, idxComp = 0;
			KDTreeHist o1DTClust,o1DTComp;
			int iV = 0;
			for(iV=0;iV<vClustIDs.size();iV++)
			{	if(vClustIDs[iV]==iC)
					v1DFloatClust[idxClust++]=vFloat[iV*iCols+iD];
				else
					v1DFloatComp[idxComp++]=vFloat[iV*iCols+iD];
			}
			o1DTClust.SetData(1,&v1DFloatClust[0],vCounts[iC]);
			o1DTComp.SetData(1,&v1DFloatComp[0],iRows-vCounts[iC]);
			prob_t kld = KLDivSym(o1DTClust,o1DTComp);
			int iJ;
			for(iJ=iBestDims-1;iJ>=0;iJ--)
			{	if(kld>vKLDivs[iC][iJ])
				{	int iJ2;
					for(iJ2=0;iJ2<iJ;iJ2++)
					{	vKLDivs[iC][iJ2]=vKLDivs[iC][iJ2+1];
						vBestDims[iC][iJ2]=vBestDims[iC][iJ2+1];
					}
					vKLDivs[iC][iJ]=kld;
					vBestDims[iC][iJ]=iD;
					break;
				}
			}
		}
	}
	return true;
}
Пример #2
0
void MatMult(A2D<ID_T>& a, A2D<ID_T>& b,A2D<ID_T>& out)
{	int iRowsA = 0, iColsA = 0, iRowsB = 0 , iColsB = 0;
	try
	{
		iRowsA = a.Rows(); if(!iRowsA) return; iColsA = a.Cols();
		iRowsB = b.Rows(); if(!iRowsB) return; iColsB = b.Cols();
		if(iColsA!=iRowsB)
			return;
		out.Init(iRowsA,iColsB); out.Fill(0.0);
		int i,j,k;
		for(i=0;i<iRowsA;i++)
			for(j=0;j<iColsB;j++)
				for(k=0;k<iColsA;k++)
					out[i][j] += a[i][k] * b[k][j];
	}
	catch(...)
	{
		Write2Log("Exception in MatMult iRowsA=%d iColsA=%d iRowsB=%d iColsB=%d",iRowsA,iColsA,iRowsB,iColsB);
	}
}
Пример #3
0
bool FindBest1DDims(vector<float>& vFloat,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<prob_t>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp)
{	vBestDims.Init(iClusts+1,iBestDims);
	vKLDivs.Init(iClusts+1,iBestDims);
	vKLDivs.Fill(-99999.9f);
	int iC = 1 , iRows = vClustIDs.size();
	double dJnk = 0.0;
	CString msg;
	vector< vector<float> > vCorrelMat;
	vector<float> vMean;
	msg.Format("Computing %d X %d correlation matrix",iCols,iCols);
	pUp->SetProgress(msg);
	CovarMat(vFloat,vClustIDs.size(),iCols,vCorrelMat,vMean,true);
	for(iC=1;iC<=iClusts;iC++)
	{	int iD;
		vector< pair<float, int> > vKLDDims(iCols);
		for(iD=0;iD<iCols;iD++,dJnk++)
		{	msg.Format("Finding best %d dimensions for cluster %d of %d : Dim=%s",iBestDims,iC,iClusts,*vAxes[iD]);
			pUp->SetProgress(msg,100.0*dJnk/(iClusts*iCols));
			vector<float> v1DFloatClust(vCounts[iC]),v1DFloatComp(iRows-vCounts[iC]);
			int idxClust = 0, idxComp = 0;
			KDTreeHist o1DTClust,o1DTComp;
			int iV = 0;
			for(iV=0;iV<vClustIDs.size();iV++)
			{	if(vClustIDs[iV]==iC)
					v1DFloatClust[idxClust++]=vFloat[iV*iCols+iD];
				else
					v1DFloatComp[idxComp++]=vFloat[iV*iCols+iD];
			}
			o1DTClust.SetData(1,&v1DFloatClust[0],vCounts[iC]);
			o1DTComp.SetData(1,&v1DFloatComp[0],iRows-vCounts[iC]);
			prob_t kld = KLDivSym(o1DTClust,o1DTComp);
			
			vKLDDims[iD]=pair<float,int>(kld,iD);
			/*pair<float,int> oP(kld,iD);
			if(vKLDDims.size()<iBestDims)
				vKLDDims.push_back(oP);
			else
			{	int idx = GetReplaceIndex(vCorrelMat,vKLDDims,oP);
				if(idx != -1)
					vKLDDims[idx]=oP;
			}*/
		}
		/*sort(vKLDTmp.begin(),vKLDTmp.end());
		int iJ , idx = iBestDims-1, iFound=0;
		for(iJ=iCols-1;iJ>=0;iJ--)
		{
			int iK;
			bool bRedund = false;
			for(iK=iBestDims-1;iK>=idx;iK--)
			{
				if(SkipPair(vBestDims[iC][iK],vKLDTmp[iJ].second))
				{
					bRedund = true;
					break;
				}
			}
			if(!bRedund)
			{
				vBestDims[iC][idx]=vKLDTmp[iJ].second;
				vKLDivs[iC][idx]=vKLDTmp[iJ].first;
				idx--;
				iFound++;
			}
			if(iFound==iBestDims)
				break;
		}*/
		vector<int> dimIDs(iCols) , bestdimIDs(iCols);
		int iJ = 0;
		for(iJ=0;iJ<iCols;iJ++) dimIDs[iJ]=iJ;
		//LogF F; FILE* fp=F.Open();
		float maxScore = -10000.0 , tmpScore = 0.0;
		btb::combination_init(&dimIDs[0],&dimIDs[iBestDims],&dimIDs[dimIDs.size()]);
		do
		{	vector< pair<float,int> > vKLDTmp(iBestDims);
			int iK = 0;
			for(;iK<iBestDims;iK++)
			{
				vKLDTmp[iK]=vKLDDims[dimIDs[iK]];
				//fprintf(fp,"%d\t",dimIDs[iK]);
			} //fprintf(fp,"\n");
			if((tmpScore=KLDCorVal(vCorrelMat,vKLDTmp))>maxScore)
			{   maxScore = tmpScore;
				bestdimIDs=dimIDs;
				Write2Log("maxScore=%.2f",maxScore);
				WriteVec2Log(bestdimIDs);
			}

		}while(btb::next_combination(&dimIDs[0], &dimIDs[iBestDims], &dimIDs[dimIDs.size()]));
		//F.Close();
		vector< pair<float,int> > vKLDTmp(iBestDims);
		for(iJ=0;iJ<iBestDims;iJ++) vKLDTmp[iJ] = vKLDDims[bestdimIDs[iJ]];
		vKLDDims=vKLDTmp;
		sort(vKLDDims.begin(),vKLDDims.end());
		for(iJ=0;iJ<vKLDDims.size();iJ++)
		{	vBestDims[iC][iJ]=vKLDDims[iJ].second;
			vKLDivs[iC][iJ]=vKLDDims[iJ].first;
		}
	}
	return true;
}
Пример #4
0
bool FindBest2DDims(vector<float>& vFloat,vector<float>& vRange,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<KLD2D>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp)
{	vBestDims.Init(iClusts+1,iBestDims);//each cluster will get iBestDims to perform multidimensional kldiv on later
	bool bInit = false;
	//vKLDivs.Init(iClusts+1,iBestDims);
	int iC = 1 , iRows = vClustIDs.size() , iTot = iClusts*IntegerSum(iCols-1);
	double dJnk = 0.0;
	const float fMinRange = 0.009; //min range for a dimension to be usable
	const float fMaxRange = 1e7; //max range for a dimension to be usable
	CString msg;
	for(iC=1;iC<=iClusts;iC++)
	{	
		vector<KLD2D> vKLDivTmp(IntegerSum(iCols-1));
		int iD1,iD2, iK = 0;
		for(iD1=0;iD1<iCols;iD1++)
		{	
			for(iD2=iD1+1;iD2<iCols;iD2++,dJnk++)
			{
				if(SkipPair(iD1,iD2)) continue;

				//exclude 2D slice consisting of empty signal -- occurs when a wire is grounded
				//note index+1 -- this is because of CVertex using index 0 as # of clusters vertex belongs to
				//so all dimensions are offset by 1 -- see CVerxStack::GetFloatV for more details, that's
				//where vRange is initialized
				//also exclude dimensions where the range is so huge that its likely to be noise, this rarely
				//happens but when it does it can produce spurious results by forcing together very tightly points
				//that shouldn't be so close
				if(vRange[iD1+1]<fMinRange || vRange[iD2+1]<fMinRange || vRange[iD1+1]>fMaxRange || vRange[iD2+1]>fMaxRange)
				{	Write2Log("Skipping slice %s %s with ranges %.12f %.12f",*vAxes[iD1],*vAxes[iD2],vRange[iD1+1],vRange[iD2+1]);
					continue;
				}

#ifdef _DEBUG
				if(iD1==2 && iC==4)// && iD2==17)
				{
					int moo=0;
				}
#endif
				msg.Format("Finding best %d dimensions for cluster %d of %d : Dim1=%s Dim2=%s",iBestDims,iC,iClusts,*vAxes[iD1],*vAxes[iD2]);
				pUp->SetProgress(msg,100.0*dJnk/iTot);
				vector<float> v2DFloatClust(2*vCounts[iC]),v2DFloatComp(2*(iRows-vCounts[iC]));
				int idxClust = 0, idxComp = 0;
				KDTreeHist o2DTClust,o2DTComp;
				int iV = 0;
				for(iV=0;iV<vClustIDs.size();iV++)
				{	//initialize the trees
					if(vClustIDs[iV]==iC)
					{	v2DFloatClust[idxClust++]=vFloat[iV*iCols+iD1];
						v2DFloatClust[idxClust++]=vFloat[iV*iCols+iD2];
					}
					else
					{	v2DFloatComp[idxComp++]=vFloat[iV*iCols+iD1];
						v2DFloatComp[idxComp++]=vFloat[iV*iCols+iD2];
					}
				}
				o2DTClust.SetData(2,&v2DFloatClust[0],vCounts[iC]);
				o2DTComp.SetData(2,&v2DFloatComp[0],iRows-vCounts[iC]);
				prob_t kld = KLDivSym(o2DTClust,o2DTComp); //compute kld
				//Write2Log("2D kld C%d dims:%s %s = %.4f",iC,*vAxes[iD1],*vAxes[iD2],kld);
				vKLDivTmp[iK++].Init(iD1,iD2,kld);//store kldiv
			}
		}
		vKLDivTmp.resize(iK);
		//sort results by kldiv values
		sort(vKLDivTmp.begin(),vKLDivTmp.end());

		if(!bInit)
		{
			vKLDivs.Init(iClusts+1,iK); // init here
			bInit=true;
		}

		copy(vKLDivTmp.begin(),vKLDivTmp.end(),&vKLDivs[iC][0]); // copy for later use

		//go through results, picking out top 8 dimensions, make sure not to pick
		//the same dimension twice
		int iFound = 0;
		set<int> sDims; // stores dimensions already picked
		int idx = iK-1, jdx; // start at best kldiv pair
		set< pair<float,int> > vKD1D;
		for(;idx>=0 && iFound<iBestDims;idx--)
		{	
			bool bHas1=sDims.find(vKLDivTmp[idx].m_iD1)!=sDims.end() || HasSkipPair(sDims,vKLDivTmp[idx].m_iD1),
				bHas2=sDims.find(vKLDivTmp[idx].m_iD2)!=sDims.end() || HasSkipPair(sDims,vKLDivTmp[idx].m_iD2);
			if(iFound+2<=iBestDims)
			{	//can add both
				if(!bHas1)
				{	sDims.insert(vKLDivTmp[idx].m_iD1);	//store which dims we already have
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1;
					iFound++;
				}
				if(!bHas2)
				{	sDims.insert(vKLDivTmp[idx].m_iD2);
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2;
					iFound++;
				}
			}
			else if(iFound+1<=iBestDims)
			{	//can add only 1
				if(bHas1 && !bHas2)
				{	sDims.insert(vKLDivTmp[idx].m_iD2);
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2;
					iFound++;
				}
				else if(bHas2 && !bHas1)
				{	sDims.insert(vKLDivTmp[idx].m_iD1);	//store which dims we already have
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1;
					iFound++;
				}
				else if(!bHas1)
				{	sDims.insert(vKLDivTmp[idx].m_iD1);	//store which dims we already have
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1;
					iFound++;
				}
				else if(!bHas2) // can't ever go here
				{	sDims.insert(vKLDivTmp[idx].m_iD2);
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2;
					iFound++;
				}
				/*               fun with truth tables
						has1   has2  !has1  !has2   has1 && !has2   has2 && !has1
						true   true  false  false       false          false
						true  false  false  true        true           false
						false  true  true   false       false          true
						false false  true   true        false          false
				*/
			}
		}
#if 0
		Write2Log("\nClust%d 2D kldiv pairs(best 16) info follows:\n",iC);
		LogF F;
		FILE* fp = F.Open();
		int y=iK-16>=0?iK-16:0;
		for(;y<iK;y++)
		{	fprintf(fp,"pair%d D1=%s D2=%s kld=%.4f\n",
				y,*vAxes[vKLDivTmp[y].m_iD1],*vAxes[vKLDivTmp[y].m_iD2],vKLDivTmp[y].m_kld);
		} fprintf(fp,"\n\n");
#endif
	}
	return true;
}