Beispiel #1
0
//gets the inverse of the covariance matrix for a cluster
//input vFloat has each row as a data vector, each column as a dimension, to get data vector i do vFloat[i*iCols]
//bool Clust2CovarMatInv(vector<vector< ID_T > >& vCovarMat,vector<ID_T>& vMean, vector<int>& vClustIDs,int iClustID,vector<double>& vFloat,int iRows,int iCols,int& iClustSz)
bool Clust2CovarMatInv(A2D< ID_T >& vCovarMat,vector<ID_T>& vMean, vector<int>& vClustIDs,int iClustID,vector<double>& vFloat,int iRows,int iCols,int& iClustSz)
{
	int i = 0, j = 0;
		iClustSz = count(vClustIDs.begin(),vClustIDs.end(),iClustID);
	if(!iClustSz)
		return false;
	A2D<double> vClustData(iClustSz,iCols);
	vClustData.Fill(0.0);
	for(i=0;i<iRows;i++)
		if(vClustIDs[i]==iClustID)
			copy(&vFloat[i*iCols],&vFloat[i*iCols+iCols],vClustData[j++]);

	Write2LogPlain("clust %d sz=%d\n",iClustID,iClustSz);
	
	CovarMat(vClustData,iClustSz,iCols,vCovarMat,vMean);
		
	if(false){
	Write2LogPlain("clust %d covar mat %dX%d\n",iClustID,vCovarMat.Rows(),vCovarMat.Cols());
	for(i=0;i<vCovarMat.Rows();i++){
		for(j=0;j<vCovarMat.Cols();j++){
			Write2LogPlain("%g ",vCovarMat[i][j]);
		}
		Write2LogPlain("\n");
	}
	}

#ifdef _DEBUG
	A2D<ID_T> mtmp(vCovarMat);
	
	bool b = InvertMatrix(vCovarMat);

	Write2LogPlain("inv covar mat");
	for(i=0;i<vCovarMat.Rows();i++){
		for(j=0;j<vCovarMat.Cols();j++){
			Write2LogPlain("%g ",vCovarMat[i][j]);
		}
		Write2LogPlain("\n");
	}
	
	A2D<ID_T> vident;
	MatMult(mtmp,vCovarMat,vident);
	
	Write2LogPlain("is this identity matrix?");
	for(i=0;i<vident.Rows();i++){
		for(j=0;j<vident.Cols();j++){
			Write2LogPlain("%g ",vident[i][j]);
		}
		Write2LogPlain("\n");
	}
	
	return b;
#else
	return InvertMatrix(vCovarMat);
#endif
}
Beispiel #2
0
bool FindBest1DDims(vector<float>& vFloat,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<prob_t>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp)
{	vBestDims.Init(iClusts+1,iBestDims);
	vKLDivs.Init(iClusts+1,iBestDims);
	vKLDivs.Fill(-99999.9f);
	int iC = 1 , iRows = vClustIDs.size();
	double dJnk = 0.0;
	CString msg;
	for(iC=1;iC<=iClusts;iC++)
	{	int iD;
		for(iD=0;iD<iCols;iD++,dJnk++)
		{	msg.Format("Finding best %d dimensions for cluster %d of %d : Dim=%s",iBestDims,iC,iClusts,*vAxes[iD]);
			pUp->SetProgress(msg,100.0*dJnk/(iClusts*iCols));
			vector<float> v1DFloatClust(vCounts[iC]),v1DFloatComp(iRows-vCounts[iC]);
			int idxClust = 0, idxComp = 0;
			KDTreeHist o1DTClust,o1DTComp;
			int iV = 0;
			for(iV=0;iV<vClustIDs.size();iV++)
			{	if(vClustIDs[iV]==iC)
					v1DFloatClust[idxClust++]=vFloat[iV*iCols+iD];
				else
					v1DFloatComp[idxComp++]=vFloat[iV*iCols+iD];
			}
			o1DTClust.SetData(1,&v1DFloatClust[0],vCounts[iC]);
			o1DTComp.SetData(1,&v1DFloatComp[0],iRows-vCounts[iC]);
			prob_t kld = KLDivSym(o1DTClust,o1DTComp);
			int iJ;
			for(iJ=iBestDims-1;iJ>=0;iJ--)
			{	if(kld>vKLDivs[iC][iJ])
				{	int iJ2;
					for(iJ2=0;iJ2<iJ;iJ2++)
					{	vKLDivs[iC][iJ2]=vKLDivs[iC][iJ2+1];
						vBestDims[iC][iJ2]=vBestDims[iC][iJ2+1];
					}
					vKLDivs[iC][iJ]=kld;
					vBestDims[iC][iJ]=iD;
					break;
				}
			}
		}
	}
	return true;
}
Beispiel #3
0
bool InvertMatrix(A2D<ID_T>& vData)
{
	ID_T d;
	A2D<ID_T> vTmp(vData.Rows(),vData.Cols());
	int i,j;
	for(i=0;i<vData.Rows();i++)
		for(j=0;j<vData.Cols();j++)
			vTmp[i][j]=vData[i][j];
	
	int N = vData.Rows();
	vector<int> indx(N);
	vector<ID_T> col(vData.Cols());
	if(!NR::ludcmp(vTmp,indx,d))//Decompose the matrix just once.
	{	Write2Log("Couldn't invert matrix!");
		return false;		
	}
	for(j=0;j<N;j++) 
	{ //Find inverse by columns.
		for(i=0;i<N;i++)
			col[i]=0.0;
		col[j]=1.0;
		NR::lubksb(vTmp,indx,&col[0]);
		for(i=0;i<N;i++) 
			vData[i][j]=col[i];
	}

	return true;
}
Beispiel #4
0
void MatMult(A2D<ID_T>& a, A2D<ID_T>& b,A2D<ID_T>& out)
{	int iRowsA = 0, iColsA = 0, iRowsB = 0 , iColsB = 0;
	try
	{
		iRowsA = a.Rows(); if(!iRowsA) return; iColsA = a.Cols();
		iRowsB = b.Rows(); if(!iRowsB) return; iColsB = b.Cols();
		if(iColsA!=iRowsB)
			return;
		out.Init(iRowsA,iColsB); out.Fill(0.0);
		int i,j,k;
		for(i=0;i<iRowsA;i++)
			for(j=0;j<iColsB;j++)
				for(k=0;k<iColsA;k++)
					out[i][j] += a[i][k] * b[k][j];
	}
	catch(...)
	{
		Write2Log("Exception in MatMult iRowsA=%d iColsA=%d iRowsB=%d iColsB=%d",iRowsA,iColsA,iRowsB,iColsB);
	}
}
Beispiel #5
0
bool FindBest1DDims(vector<float>& vFloat,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<prob_t>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp)
{	vBestDims.Init(iClusts+1,iBestDims);
	vKLDivs.Init(iClusts+1,iBestDims);
	vKLDivs.Fill(-99999.9f);
	int iC = 1 , iRows = vClustIDs.size();
	double dJnk = 0.0;
	CString msg;
	vector< vector<float> > vCorrelMat;
	vector<float> vMean;
	msg.Format("Computing %d X %d correlation matrix",iCols,iCols);
	pUp->SetProgress(msg);
	CovarMat(vFloat,vClustIDs.size(),iCols,vCorrelMat,vMean,true);
	for(iC=1;iC<=iClusts;iC++)
	{	int iD;
		vector< pair<float, int> > vKLDDims(iCols);
		for(iD=0;iD<iCols;iD++,dJnk++)
		{	msg.Format("Finding best %d dimensions for cluster %d of %d : Dim=%s",iBestDims,iC,iClusts,*vAxes[iD]);
			pUp->SetProgress(msg,100.0*dJnk/(iClusts*iCols));
			vector<float> v1DFloatClust(vCounts[iC]),v1DFloatComp(iRows-vCounts[iC]);
			int idxClust = 0, idxComp = 0;
			KDTreeHist o1DTClust,o1DTComp;
			int iV = 0;
			for(iV=0;iV<vClustIDs.size();iV++)
			{	if(vClustIDs[iV]==iC)
					v1DFloatClust[idxClust++]=vFloat[iV*iCols+iD];
				else
					v1DFloatComp[idxComp++]=vFloat[iV*iCols+iD];
			}
			o1DTClust.SetData(1,&v1DFloatClust[0],vCounts[iC]);
			o1DTComp.SetData(1,&v1DFloatComp[0],iRows-vCounts[iC]);
			prob_t kld = KLDivSym(o1DTClust,o1DTComp);
			
			vKLDDims[iD]=pair<float,int>(kld,iD);
			/*pair<float,int> oP(kld,iD);
			if(vKLDDims.size()<iBestDims)
				vKLDDims.push_back(oP);
			else
			{	int idx = GetReplaceIndex(vCorrelMat,vKLDDims,oP);
				if(idx != -1)
					vKLDDims[idx]=oP;
			}*/
		}
		/*sort(vKLDTmp.begin(),vKLDTmp.end());
		int iJ , idx = iBestDims-1, iFound=0;
		for(iJ=iCols-1;iJ>=0;iJ--)
		{
			int iK;
			bool bRedund = false;
			for(iK=iBestDims-1;iK>=idx;iK--)
			{
				if(SkipPair(vBestDims[iC][iK],vKLDTmp[iJ].second))
				{
					bRedund = true;
					break;
				}
			}
			if(!bRedund)
			{
				vBestDims[iC][idx]=vKLDTmp[iJ].second;
				vKLDivs[iC][idx]=vKLDTmp[iJ].first;
				idx--;
				iFound++;
			}
			if(iFound==iBestDims)
				break;
		}*/
		vector<int> dimIDs(iCols) , bestdimIDs(iCols);
		int iJ = 0;
		for(iJ=0;iJ<iCols;iJ++) dimIDs[iJ]=iJ;
		//LogF F; FILE* fp=F.Open();
		float maxScore = -10000.0 , tmpScore = 0.0;
		btb::combination_init(&dimIDs[0],&dimIDs[iBestDims],&dimIDs[dimIDs.size()]);
		do
		{	vector< pair<float,int> > vKLDTmp(iBestDims);
			int iK = 0;
			for(;iK<iBestDims;iK++)
			{
				vKLDTmp[iK]=vKLDDims[dimIDs[iK]];
				//fprintf(fp,"%d\t",dimIDs[iK]);
			} //fprintf(fp,"\n");
			if((tmpScore=KLDCorVal(vCorrelMat,vKLDTmp))>maxScore)
			{   maxScore = tmpScore;
				bestdimIDs=dimIDs;
				Write2Log("maxScore=%.2f",maxScore);
				WriteVec2Log(bestdimIDs);
			}

		}while(btb::next_combination(&dimIDs[0], &dimIDs[iBestDims], &dimIDs[dimIDs.size()]));
		//F.Close();
		vector< pair<float,int> > vKLDTmp(iBestDims);
		for(iJ=0;iJ<iBestDims;iJ++) vKLDTmp[iJ] = vKLDDims[bestdimIDs[iJ]];
		vKLDDims=vKLDTmp;
		sort(vKLDDims.begin(),vKLDDims.end());
		for(iJ=0;iJ<vKLDDims.size();iJ++)
		{	vBestDims[iC][iJ]=vKLDDims[iJ].second;
			vKLDivs[iC][iJ]=vKLDDims[iJ].first;
		}
	}
	return true;
}
Beispiel #6
0
bool FindBest2DDims(vector<float>& vFloat,vector<float>& vRange,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<KLD2D>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp)
{	vBestDims.Init(iClusts+1,iBestDims);//each cluster will get iBestDims to perform multidimensional kldiv on later
	bool bInit = false;
	//vKLDivs.Init(iClusts+1,iBestDims);
	int iC = 1 , iRows = vClustIDs.size() , iTot = iClusts*IntegerSum(iCols-1);
	double dJnk = 0.0;
	const float fMinRange = 0.009; //min range for a dimension to be usable
	const float fMaxRange = 1e7; //max range for a dimension to be usable
	CString msg;
	for(iC=1;iC<=iClusts;iC++)
	{	
		vector<KLD2D> vKLDivTmp(IntegerSum(iCols-1));
		int iD1,iD2, iK = 0;
		for(iD1=0;iD1<iCols;iD1++)
		{	
			for(iD2=iD1+1;iD2<iCols;iD2++,dJnk++)
			{
				if(SkipPair(iD1,iD2)) continue;

				//exclude 2D slice consisting of empty signal -- occurs when a wire is grounded
				//note index+1 -- this is because of CVertex using index 0 as # of clusters vertex belongs to
				//so all dimensions are offset by 1 -- see CVerxStack::GetFloatV for more details, that's
				//where vRange is initialized
				//also exclude dimensions where the range is so huge that its likely to be noise, this rarely
				//happens but when it does it can produce spurious results by forcing together very tightly points
				//that shouldn't be so close
				if(vRange[iD1+1]<fMinRange || vRange[iD2+1]<fMinRange || vRange[iD1+1]>fMaxRange || vRange[iD2+1]>fMaxRange)
				{	Write2Log("Skipping slice %s %s with ranges %.12f %.12f",*vAxes[iD1],*vAxes[iD2],vRange[iD1+1],vRange[iD2+1]);
					continue;
				}

#ifdef _DEBUG
				if(iD1==2 && iC==4)// && iD2==17)
				{
					int moo=0;
				}
#endif
				msg.Format("Finding best %d dimensions for cluster %d of %d : Dim1=%s Dim2=%s",iBestDims,iC,iClusts,*vAxes[iD1],*vAxes[iD2]);
				pUp->SetProgress(msg,100.0*dJnk/iTot);
				vector<float> v2DFloatClust(2*vCounts[iC]),v2DFloatComp(2*(iRows-vCounts[iC]));
				int idxClust = 0, idxComp = 0;
				KDTreeHist o2DTClust,o2DTComp;
				int iV = 0;
				for(iV=0;iV<vClustIDs.size();iV++)
				{	//initialize the trees
					if(vClustIDs[iV]==iC)
					{	v2DFloatClust[idxClust++]=vFloat[iV*iCols+iD1];
						v2DFloatClust[idxClust++]=vFloat[iV*iCols+iD2];
					}
					else
					{	v2DFloatComp[idxComp++]=vFloat[iV*iCols+iD1];
						v2DFloatComp[idxComp++]=vFloat[iV*iCols+iD2];
					}
				}
				o2DTClust.SetData(2,&v2DFloatClust[0],vCounts[iC]);
				o2DTComp.SetData(2,&v2DFloatComp[0],iRows-vCounts[iC]);
				prob_t kld = KLDivSym(o2DTClust,o2DTComp); //compute kld
				//Write2Log("2D kld C%d dims:%s %s = %.4f",iC,*vAxes[iD1],*vAxes[iD2],kld);
				vKLDivTmp[iK++].Init(iD1,iD2,kld);//store kldiv
			}
		}
		vKLDivTmp.resize(iK);
		//sort results by kldiv values
		sort(vKLDivTmp.begin(),vKLDivTmp.end());

		if(!bInit)
		{
			vKLDivs.Init(iClusts+1,iK); // init here
			bInit=true;
		}

		copy(vKLDivTmp.begin(),vKLDivTmp.end(),&vKLDivs[iC][0]); // copy for later use

		//go through results, picking out top 8 dimensions, make sure not to pick
		//the same dimension twice
		int iFound = 0;
		set<int> sDims; // stores dimensions already picked
		int idx = iK-1, jdx; // start at best kldiv pair
		set< pair<float,int> > vKD1D;
		for(;idx>=0 && iFound<iBestDims;idx--)
		{	
			bool bHas1=sDims.find(vKLDivTmp[idx].m_iD1)!=sDims.end() || HasSkipPair(sDims,vKLDivTmp[idx].m_iD1),
				bHas2=sDims.find(vKLDivTmp[idx].m_iD2)!=sDims.end() || HasSkipPair(sDims,vKLDivTmp[idx].m_iD2);
			if(iFound+2<=iBestDims)
			{	//can add both
				if(!bHas1)
				{	sDims.insert(vKLDivTmp[idx].m_iD1);	//store which dims we already have
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1;
					iFound++;
				}
				if(!bHas2)
				{	sDims.insert(vKLDivTmp[idx].m_iD2);
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2;
					iFound++;
				}
			}
			else if(iFound+1<=iBestDims)
			{	//can add only 1
				if(bHas1 && !bHas2)
				{	sDims.insert(vKLDivTmp[idx].m_iD2);
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2;
					iFound++;
				}
				else if(bHas2 && !bHas1)
				{	sDims.insert(vKLDivTmp[idx].m_iD1);	//store which dims we already have
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1;
					iFound++;
				}
				else if(!bHas1)
				{	sDims.insert(vKLDivTmp[idx].m_iD1);	//store which dims we already have
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1;
					iFound++;
				}
				else if(!bHas2) // can't ever go here
				{	sDims.insert(vKLDivTmp[idx].m_iD2);
					vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2;
					iFound++;
				}
				/*               fun with truth tables
						has1   has2  !has1  !has2   has1 && !has2   has2 && !has1
						true   true  false  false       false          false
						true  false  false  true        true           false
						false  true  true   false       false          true
						false false  true   true        false          false
				*/
			}
		}
#if 0
		Write2Log("\nClust%d 2D kldiv pairs(best 16) info follows:\n",iC);
		LogF F;
		FILE* fp = F.Open();
		int y=iK-16>=0?iK-16:0;
		for(;y<iK;y++)
		{	fprintf(fp,"pair%d D1=%s D2=%s kld=%.4f\n",
				y,*vAxes[vKLDivTmp[y].m_iD1],*vAxes[vKLDivTmp[y].m_iD2],vKLDivTmp[y].m_kld);
		} fprintf(fp,"\n\n");
#endif
	}
	return true;
}