bool FindBest1DDims(vector<float>& vFloat,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<prob_t>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp) { vBestDims.Init(iClusts+1,iBestDims); vKLDivs.Init(iClusts+1,iBestDims); vKLDivs.Fill(-99999.9f); int iC = 1 , iRows = vClustIDs.size(); double dJnk = 0.0; CString msg; for(iC=1;iC<=iClusts;iC++) { int iD; for(iD=0;iD<iCols;iD++,dJnk++) { msg.Format("Finding best %d dimensions for cluster %d of %d : Dim=%s",iBestDims,iC,iClusts,*vAxes[iD]); pUp->SetProgress(msg,100.0*dJnk/(iClusts*iCols)); vector<float> v1DFloatClust(vCounts[iC]),v1DFloatComp(iRows-vCounts[iC]); int idxClust = 0, idxComp = 0; KDTreeHist o1DTClust,o1DTComp; int iV = 0; for(iV=0;iV<vClustIDs.size();iV++) { if(vClustIDs[iV]==iC) v1DFloatClust[idxClust++]=vFloat[iV*iCols+iD]; else v1DFloatComp[idxComp++]=vFloat[iV*iCols+iD]; } o1DTClust.SetData(1,&v1DFloatClust[0],vCounts[iC]); o1DTComp.SetData(1,&v1DFloatComp[0],iRows-vCounts[iC]); prob_t kld = KLDivSym(o1DTClust,o1DTComp); int iJ; for(iJ=iBestDims-1;iJ>=0;iJ--) { if(kld>vKLDivs[iC][iJ]) { int iJ2; for(iJ2=0;iJ2<iJ;iJ2++) { vKLDivs[iC][iJ2]=vKLDivs[iC][iJ2+1]; vBestDims[iC][iJ2]=vBestDims[iC][iJ2+1]; } vKLDivs[iC][iJ]=kld; vBestDims[iC][iJ]=iD; break; } } } } return true; }
void MatMult(A2D<ID_T>& a, A2D<ID_T>& b,A2D<ID_T>& out) { int iRowsA = 0, iColsA = 0, iRowsB = 0 , iColsB = 0; try { iRowsA = a.Rows(); if(!iRowsA) return; iColsA = a.Cols(); iRowsB = b.Rows(); if(!iRowsB) return; iColsB = b.Cols(); if(iColsA!=iRowsB) return; out.Init(iRowsA,iColsB); out.Fill(0.0); int i,j,k; for(i=0;i<iRowsA;i++) for(j=0;j<iColsB;j++) for(k=0;k<iColsA;k++) out[i][j] += a[i][k] * b[k][j]; } catch(...) { Write2Log("Exception in MatMult iRowsA=%d iColsA=%d iRowsB=%d iColsB=%d",iRowsA,iColsA,iRowsB,iColsB); } }
bool FindBest1DDims(vector<float>& vFloat,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<prob_t>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp) { vBestDims.Init(iClusts+1,iBestDims); vKLDivs.Init(iClusts+1,iBestDims); vKLDivs.Fill(-99999.9f); int iC = 1 , iRows = vClustIDs.size(); double dJnk = 0.0; CString msg; vector< vector<float> > vCorrelMat; vector<float> vMean; msg.Format("Computing %d X %d correlation matrix",iCols,iCols); pUp->SetProgress(msg); CovarMat(vFloat,vClustIDs.size(),iCols,vCorrelMat,vMean,true); for(iC=1;iC<=iClusts;iC++) { int iD; vector< pair<float, int> > vKLDDims(iCols); for(iD=0;iD<iCols;iD++,dJnk++) { msg.Format("Finding best %d dimensions for cluster %d of %d : Dim=%s",iBestDims,iC,iClusts,*vAxes[iD]); pUp->SetProgress(msg,100.0*dJnk/(iClusts*iCols)); vector<float> v1DFloatClust(vCounts[iC]),v1DFloatComp(iRows-vCounts[iC]); int idxClust = 0, idxComp = 0; KDTreeHist o1DTClust,o1DTComp; int iV = 0; for(iV=0;iV<vClustIDs.size();iV++) { if(vClustIDs[iV]==iC) v1DFloatClust[idxClust++]=vFloat[iV*iCols+iD]; else v1DFloatComp[idxComp++]=vFloat[iV*iCols+iD]; } o1DTClust.SetData(1,&v1DFloatClust[0],vCounts[iC]); o1DTComp.SetData(1,&v1DFloatComp[0],iRows-vCounts[iC]); prob_t kld = KLDivSym(o1DTClust,o1DTComp); vKLDDims[iD]=pair<float,int>(kld,iD); /*pair<float,int> oP(kld,iD); if(vKLDDims.size()<iBestDims) vKLDDims.push_back(oP); else { int idx = GetReplaceIndex(vCorrelMat,vKLDDims,oP); if(idx != -1) vKLDDims[idx]=oP; }*/ } /*sort(vKLDTmp.begin(),vKLDTmp.end()); int iJ , idx = iBestDims-1, iFound=0; for(iJ=iCols-1;iJ>=0;iJ--) { int iK; bool bRedund = false; for(iK=iBestDims-1;iK>=idx;iK--) { if(SkipPair(vBestDims[iC][iK],vKLDTmp[iJ].second)) { bRedund = true; break; } } if(!bRedund) { vBestDims[iC][idx]=vKLDTmp[iJ].second; vKLDivs[iC][idx]=vKLDTmp[iJ].first; idx--; iFound++; } if(iFound==iBestDims) break; }*/ vector<int> dimIDs(iCols) , bestdimIDs(iCols); int iJ = 0; for(iJ=0;iJ<iCols;iJ++) dimIDs[iJ]=iJ; //LogF F; FILE* fp=F.Open(); float maxScore = -10000.0 , tmpScore = 0.0; btb::combination_init(&dimIDs[0],&dimIDs[iBestDims],&dimIDs[dimIDs.size()]); do { vector< pair<float,int> > vKLDTmp(iBestDims); int iK = 0; for(;iK<iBestDims;iK++) { vKLDTmp[iK]=vKLDDims[dimIDs[iK]]; //fprintf(fp,"%d\t",dimIDs[iK]); } //fprintf(fp,"\n"); if((tmpScore=KLDCorVal(vCorrelMat,vKLDTmp))>maxScore) { maxScore = tmpScore; bestdimIDs=dimIDs; Write2Log("maxScore=%.2f",maxScore); WriteVec2Log(bestdimIDs); } }while(btb::next_combination(&dimIDs[0], &dimIDs[iBestDims], &dimIDs[dimIDs.size()])); //F.Close(); vector< pair<float,int> > vKLDTmp(iBestDims); for(iJ=0;iJ<iBestDims;iJ++) vKLDTmp[iJ] = vKLDDims[bestdimIDs[iJ]]; vKLDDims=vKLDTmp; sort(vKLDDims.begin(),vKLDDims.end()); for(iJ=0;iJ<vKLDDims.size();iJ++) { vBestDims[iC][iJ]=vKLDDims[iJ].second; vKLDivs[iC][iJ]=vKLDDims[iJ].first; } } return true; }
bool FindBest2DDims(vector<float>& vFloat,vector<float>& vRange,int iClusts,int iCols,int iBestDims,vector<int>& vCounts,vector<int>& vClustIDs,A2D<int>& vBestDims,A2D<KLD2D>& vKLDivs,MY_STR_STACK& vAxes,const CUPDUPDATA* pUp) { vBestDims.Init(iClusts+1,iBestDims);//each cluster will get iBestDims to perform multidimensional kldiv on later bool bInit = false; //vKLDivs.Init(iClusts+1,iBestDims); int iC = 1 , iRows = vClustIDs.size() , iTot = iClusts*IntegerSum(iCols-1); double dJnk = 0.0; const float fMinRange = 0.009; //min range for a dimension to be usable const float fMaxRange = 1e7; //max range for a dimension to be usable CString msg; for(iC=1;iC<=iClusts;iC++) { vector<KLD2D> vKLDivTmp(IntegerSum(iCols-1)); int iD1,iD2, iK = 0; for(iD1=0;iD1<iCols;iD1++) { for(iD2=iD1+1;iD2<iCols;iD2++,dJnk++) { if(SkipPair(iD1,iD2)) continue; //exclude 2D slice consisting of empty signal -- occurs when a wire is grounded //note index+1 -- this is because of CVertex using index 0 as # of clusters vertex belongs to //so all dimensions are offset by 1 -- see CVerxStack::GetFloatV for more details, that's //where vRange is initialized //also exclude dimensions where the range is so huge that its likely to be noise, this rarely //happens but when it does it can produce spurious results by forcing together very tightly points //that shouldn't be so close if(vRange[iD1+1]<fMinRange || vRange[iD2+1]<fMinRange || vRange[iD1+1]>fMaxRange || vRange[iD2+1]>fMaxRange) { Write2Log("Skipping slice %s %s with ranges %.12f %.12f",*vAxes[iD1],*vAxes[iD2],vRange[iD1+1],vRange[iD2+1]); continue; } #ifdef _DEBUG if(iD1==2 && iC==4)// && iD2==17) { int moo=0; } #endif msg.Format("Finding best %d dimensions for cluster %d of %d : Dim1=%s Dim2=%s",iBestDims,iC,iClusts,*vAxes[iD1],*vAxes[iD2]); pUp->SetProgress(msg,100.0*dJnk/iTot); vector<float> v2DFloatClust(2*vCounts[iC]),v2DFloatComp(2*(iRows-vCounts[iC])); int idxClust = 0, idxComp = 0; KDTreeHist o2DTClust,o2DTComp; int iV = 0; for(iV=0;iV<vClustIDs.size();iV++) { //initialize the trees if(vClustIDs[iV]==iC) { v2DFloatClust[idxClust++]=vFloat[iV*iCols+iD1]; v2DFloatClust[idxClust++]=vFloat[iV*iCols+iD2]; } else { v2DFloatComp[idxComp++]=vFloat[iV*iCols+iD1]; v2DFloatComp[idxComp++]=vFloat[iV*iCols+iD2]; } } o2DTClust.SetData(2,&v2DFloatClust[0],vCounts[iC]); o2DTComp.SetData(2,&v2DFloatComp[0],iRows-vCounts[iC]); prob_t kld = KLDivSym(o2DTClust,o2DTComp); //compute kld //Write2Log("2D kld C%d dims:%s %s = %.4f",iC,*vAxes[iD1],*vAxes[iD2],kld); vKLDivTmp[iK++].Init(iD1,iD2,kld);//store kldiv } } vKLDivTmp.resize(iK); //sort results by kldiv values sort(vKLDivTmp.begin(),vKLDivTmp.end()); if(!bInit) { vKLDivs.Init(iClusts+1,iK); // init here bInit=true; } copy(vKLDivTmp.begin(),vKLDivTmp.end(),&vKLDivs[iC][0]); // copy for later use //go through results, picking out top 8 dimensions, make sure not to pick //the same dimension twice int iFound = 0; set<int> sDims; // stores dimensions already picked int idx = iK-1, jdx; // start at best kldiv pair set< pair<float,int> > vKD1D; for(;idx>=0 && iFound<iBestDims;idx--) { bool bHas1=sDims.find(vKLDivTmp[idx].m_iD1)!=sDims.end() || HasSkipPair(sDims,vKLDivTmp[idx].m_iD1), bHas2=sDims.find(vKLDivTmp[idx].m_iD2)!=sDims.end() || HasSkipPair(sDims,vKLDivTmp[idx].m_iD2); if(iFound+2<=iBestDims) { //can add both if(!bHas1) { sDims.insert(vKLDivTmp[idx].m_iD1); //store which dims we already have vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1; iFound++; } if(!bHas2) { sDims.insert(vKLDivTmp[idx].m_iD2); vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2; iFound++; } } else if(iFound+1<=iBestDims) { //can add only 1 if(bHas1 && !bHas2) { sDims.insert(vKLDivTmp[idx].m_iD2); vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2; iFound++; } else if(bHas2 && !bHas1) { sDims.insert(vKLDivTmp[idx].m_iD1); //store which dims we already have vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1; iFound++; } else if(!bHas1) { sDims.insert(vKLDivTmp[idx].m_iD1); //store which dims we already have vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD1; iFound++; } else if(!bHas2) // can't ever go here { sDims.insert(vKLDivTmp[idx].m_iD2); vBestDims[iC][iBestDims-iFound-1]=vKLDivTmp[idx].m_iD2; iFound++; } /* fun with truth tables has1 has2 !has1 !has2 has1 && !has2 has2 && !has1 true true false false false false true false false true true false false true true false false true false false true true false false */ } } #if 0 Write2Log("\nClust%d 2D kldiv pairs(best 16) info follows:\n",iC); LogF F; FILE* fp = F.Open(); int y=iK-16>=0?iK-16:0; for(;y<iK;y++) { fprintf(fp,"pair%d D1=%s D2=%s kld=%.4f\n", y,*vAxes[vKLDivTmp[y].m_iD1],*vAxes[vKLDivTmp[y].m_iD2],vKLDivTmp[y].m_kld); } fprintf(fp,"\n\n"); #endif } return true; }